arm_cfft_radix4_q15.c 53 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_cfft_radix4_q15.c
  4. * Description: This file has function definition of Radix-4 FFT & IFFT function and
  5. * In-place bit reversal using bit reversal table
  6. *
  7. * $Date: 23 April 2021
  8. * $Revision: V1.9.0
  9. *
  10. * Target Processor: Cortex-M and Cortex-A cores
  11. * -------------------------------------------------------------------- */
  12. /*
  13. * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  14. *
  15. * SPDX-License-Identifier: Apache-2.0
  16. *
  17. * Licensed under the Apache License, Version 2.0 (the License); you may
  18. * not use this file except in compliance with the License.
  19. * You may obtain a copy of the License at
  20. *
  21. * www.apache.org/licenses/LICENSE-2.0
  22. *
  23. * Unless required by applicable law or agreed to in writing, software
  24. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  25. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  26. * See the License for the specific language governing permissions and
  27. * limitations under the License.
  28. */
  29. #include "dsp/transform_functions.h"
  30. void arm_radix4_butterfly_q15(
  31. q15_t * pSrc16,
  32. uint32_t fftLen,
  33. const q15_t * pCoef16,
  34. uint32_t twidCoefModifier);
  35. void arm_radix4_butterfly_inverse_q15(
  36. q15_t * pSrc16,
  37. uint32_t fftLen,
  38. const q15_t * pCoef16,
  39. uint32_t twidCoefModifier);
  40. void arm_bitreversal_q15(
  41. q15_t * pSrc,
  42. uint32_t fftLen,
  43. uint16_t bitRevFactor,
  44. const uint16_t * pBitRevTab);
  45. /**
  46. @addtogroup ComplexFFTDeprecated
  47. @{
  48. */
  49. /**
  50. @brief Processing function for the Q15 CFFT/CIFFT.
  51. @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future.
  52. @param[in] S points to an instance of the Q15 CFFT/CIFFT structure.
  53. @param[in,out] pSrc points to the complex data buffer. Processing occurs in-place.
  54. @par Input and output formats:
  55. Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
  56. Hence the output format is different for different FFT sizes.
  57. The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
  58. @par
  59. | CFFT Size | Input format | Output format | Number of bits to upscale |
  60. | --------: | ------------: | ------------: | ------------------------: |
  61. | 16 | 1.15 | 5.11 | 4 |
  62. | 64 | 1.15 | 7.9 | 6 |
  63. | 256 | 1.15 | 9.7 | 8 |
  64. | 1024 | 1.15 | 11.5 | 10 |
  65. | CIFFT Size | Input format | Output format | Number of bits to upscale |
  66. | ---------: | ------------: | ------------: | ------------------------: |
  67. | 16 | 1.15 | 5.11 | 0 |
  68. | 64 | 1.15 | 7.9 | 0 |
  69. | 256 | 1.15 | 9.7 | 0 |
  70. | 1024 | 1.15 | 11.5 | 0 |
  71. */
  72. void arm_cfft_radix4_q15(
  73. const arm_cfft_radix4_instance_q15 * S,
  74. q15_t * pSrc)
  75. {
  76. if (S->ifftFlag == 1U)
  77. {
  78. /* Complex IFFT radix-4 */
  79. arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
  80. }
  81. else
  82. {
  83. /* Complex FFT radix-4 */
  84. arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
  85. }
  86. if (S->bitReverseFlag == 1U)
  87. {
  88. /* Bit Reversal */
  89. arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
  90. }
  91. }
  92. /**
  93. @} end of ComplexFFTDeprecated group
  94. */
  95. /*
  96. * Radix-4 FFT algorithm used is :
  97. *
  98. * Input real and imaginary data:
  99. * x(n) = xa + j * ya
  100. * x(n+N/4 ) = xb + j * yb
  101. * x(n+N/2 ) = xc + j * yc
  102. * x(n+3N 4) = xd + j * yd
  103. *
  104. *
  105. * Output real and imaginary data:
  106. * x(4r) = xa'+ j * ya'
  107. * x(4r+1) = xb'+ j * yb'
  108. * x(4r+2) = xc'+ j * yc'
  109. * x(4r+3) = xd'+ j * yd'
  110. *
  111. *
  112. * Twiddle factors for radix-4 FFT:
  113. * Wn = co1 + j * (- si1)
  114. * W2n = co2 + j * (- si2)
  115. * W3n = co3 + j * (- si3)
  116. * The real and imaginary output values for the radix-4 butterfly are
  117. * xa' = xa + xb + xc + xd
  118. * ya' = ya + yb + yc + yd
  119. * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
  120. * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
  121. * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
  122. * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
  123. * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
  124. * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
  125. *
  126. */
  127. /**
  128. @brief Core function for the Q15 CFFT butterfly process.
  129. @param[in,out] pSrc16 points to the in-place buffer of Q15 data type
  130. @param[in] fftLen length of the FFT
  131. @param[in] pCoef16 points to twiddle coefficient buffer
  132. @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
  133. */
  134. void arm_radix4_butterfly_q15(
  135. q15_t * pSrc16,
  136. uint32_t fftLen,
  137. const q15_t * pCoef16,
  138. uint32_t twidCoefModifier)
  139. {
  140. #if defined (ARM_MATH_DSP)
  141. q31_t R, S, T, U;
  142. q31_t C1, C2, C3, out1, out2;
  143. uint32_t n1, n2, ic, i0, j, k;
  144. q15_t *ptr1;
  145. q15_t *pSi0;
  146. q15_t *pSi1;
  147. q15_t *pSi2;
  148. q15_t *pSi3;
  149. q31_t xaya, xbyb, xcyc, xdyd;
  150. /* Total process is divided into three stages */
  151. /* process first stage, middle stages, & last stage */
  152. /* Initializations for the first stage */
  153. n2 = fftLen;
  154. n1 = n2;
  155. /* n2 = fftLen/4 */
  156. n2 >>= 2U;
  157. /* Index for twiddle coefficient */
  158. ic = 0U;
  159. /* Index for input read and output write */
  160. j = n2;
  161. pSi0 = pSrc16;
  162. pSi1 = pSi0 + 2 * n2;
  163. pSi2 = pSi1 + 2 * n2;
  164. pSi3 = pSi2 + 2 * n2;
  165. /* Input is in 1.15(q15) format */
  166. /* start of first stage process */
  167. do
  168. {
  169. /* Butterfly implementation */
  170. /* Reading i0, i0+fftLen/2 inputs */
  171. /* Read ya (real), xa(imag) input */
  172. T = read_q15x2 (pSi0);
  173. T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
  174. T = __SHADD16(T, 0); /* it turns out doing this twice is 2 cycles, the alternative takes 3 cycles */
  175. /*
  176. in = ((int16_t) (T & 0xFFFF)) >> 2; // alternative code that takes 3 cycles
  177. T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
  178. */
  179. /* Read yc (real), xc(imag) input */
  180. S = read_q15x2 (pSi2);
  181. S = __SHADD16(S, 0);
  182. S = __SHADD16(S, 0);
  183. /* R = packed((ya + yc), (xa + xc) ) */
  184. R = __QADD16(T, S);
  185. /* S = packed((ya - yc), (xa - xc) ) */
  186. S = __QSUB16(T, S);
  187. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  188. /* Read yb (real), xb(imag) input */
  189. T = read_q15x2 (pSi1);
  190. T = __SHADD16(T, 0);
  191. T = __SHADD16(T, 0);
  192. /* Read yd (real), xd(imag) input */
  193. U = read_q15x2 (pSi3);
  194. U = __SHADD16(U, 0);
  195. U = __SHADD16(U, 0);
  196. /* T = packed((yb + yd), (xb + xd) ) */
  197. T = __QADD16(T, U);
  198. /* writing the butterfly processed i0 sample */
  199. /* xa' = xa + xb + xc + xd */
  200. /* ya' = ya + yb + yc + yd */
  201. write_q15x2_ia (&pSi0, __SHADD16(R, T));
  202. /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
  203. R = __QSUB16(R, T);
  204. /* co2 & si2 are read from SIMD Coefficient pointer */
  205. C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
  206. #ifndef ARM_MATH_BIG_ENDIAN
  207. /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  208. out1 = __SMUAD(C2, R) >> 16U;
  209. /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  210. out2 = __SMUSDX(C2, R);
  211. #else
  212. /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  213. out1 = __SMUSDX(R, C2) >> 16U;
  214. /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  215. out2 = __SMUAD(C2, R);
  216. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  217. /* Reading i0+fftLen/4 */
  218. /* T = packed(yb, xb) */
  219. T = read_q15x2 (pSi1);
  220. T = __SHADD16(T, 0);
  221. T = __SHADD16(T, 0);
  222. /* writing the butterfly processed i0 + fftLen/4 sample */
  223. /* writing output(xc', yc') in little endian format */
  224. write_q15x2_ia (&pSi1, (q31_t) __PKHBT( out1, out2, 0 ));
  225. /* Butterfly calculations */
  226. /* U = packed(yd, xd) */
  227. U = read_q15x2 (pSi3);
  228. U = __SHADD16(U, 0);
  229. U = __SHADD16(U, 0);
  230. /* T = packed(yb-yd, xb-xd) */
  231. T = __QSUB16(T, U);
  232. #ifndef ARM_MATH_BIG_ENDIAN
  233. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  234. R = __QASX(S, T);
  235. /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
  236. S = __QSAX(S, T);
  237. #else
  238. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  239. R = __QSAX(S, T);
  240. /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
  241. S = __QASX(S, T);
  242. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  243. /* co1 & si1 are read from SIMD Coefficient pointer */
  244. C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
  245. /* Butterfly process for the i0+fftLen/2 sample */
  246. #ifndef ARM_MATH_BIG_ENDIAN
  247. /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  248. out1 = __SMUAD(C1, S) >> 16U;
  249. /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  250. out2 = __SMUSDX(C1, S);
  251. #else
  252. /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  253. out1 = __SMUSDX(S, C1) >> 16U;
  254. /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  255. out2 = __SMUAD(C1, S);
  256. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  257. /* writing output(xb', yb') in little endian format */
  258. write_q15x2_ia (&pSi2, __PKHBT( out1, out2, 0 ));
  259. /* co3 & si3 are read from SIMD Coefficient pointer */
  260. C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
  261. /* Butterfly process for the i0+3fftLen/4 sample */
  262. #ifndef ARM_MATH_BIG_ENDIAN
  263. /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
  264. out1 = __SMUAD(C3, R) >> 16U;
  265. /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
  266. out2 = __SMUSDX(C3, R);
  267. #else
  268. /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
  269. out1 = __SMUSDX(R, C3) >> 16U;
  270. /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
  271. out2 = __SMUAD(C3, R);
  272. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  273. /* writing output(xd', yd') in little endian format */
  274. write_q15x2_ia (&pSi3, __PKHBT( out1, out2, 0 ));
  275. /* Twiddle coefficients index modifier */
  276. ic = ic + twidCoefModifier;
  277. } while (--j);
  278. /* data is in 4.11(q11) format */
  279. /* end of first stage process */
  280. /* start of middle stage process */
  281. /* Twiddle coefficients index modifier */
  282. twidCoefModifier <<= 2U;
  283. /* Calculation of Middle stage */
  284. for (k = fftLen / 4U; k > 4U; k >>= 2U)
  285. {
  286. /* Initializations for the middle stage */
  287. n1 = n2;
  288. n2 >>= 2U;
  289. ic = 0U;
  290. for (j = 0U; j <= (n2 - 1U); j++)
  291. {
  292. /* index calculation for the coefficients */
  293. C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
  294. C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
  295. C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
  296. /* Twiddle coefficients index modifier */
  297. ic = ic + twidCoefModifier;
  298. pSi0 = pSrc16 + 2 * j;
  299. pSi1 = pSi0 + 2 * n2;
  300. pSi2 = pSi1 + 2 * n2;
  301. pSi3 = pSi2 + 2 * n2;
  302. /* Butterfly implementation */
  303. for (i0 = j; i0 < fftLen; i0 += n1)
  304. {
  305. /* Reading i0, i0+fftLen/2 inputs */
  306. /* Read ya (real), xa(imag) input */
  307. T = read_q15x2 (pSi0);
  308. /* Read yc (real), xc(imag) input */
  309. S = read_q15x2 (pSi2);
  310. /* R = packed( (ya + yc), (xa + xc)) */
  311. R = __QADD16(T, S);
  312. /* S = packed((ya - yc), (xa - xc)) */
  313. S = __QSUB16(T, S);
  314. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  315. /* Read yb (real), xb(imag) input */
  316. T = read_q15x2 (pSi1);
  317. /* Read yd (real), xd(imag) input */
  318. U = read_q15x2 (pSi3);
  319. /* T = packed( (yb + yd), (xb + xd)) */
  320. T = __QADD16(T, U);
  321. /* writing the butterfly processed i0 sample */
  322. /* xa' = xa + xb + xc + xd */
  323. /* ya' = ya + yb + yc + yd */
  324. out1 = __SHADD16(R, T);
  325. out1 = __SHADD16(out1, 0);
  326. write_q15x2 (pSi0, out1);
  327. pSi0 += 2 * n1;
  328. /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
  329. R = __SHSUB16(R, T);
  330. #ifndef ARM_MATH_BIG_ENDIAN
  331. /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
  332. out1 = __SMUAD(C2, R) >> 16U;
  333. /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  334. out2 = __SMUSDX(C2, R);
  335. #else
  336. /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  337. out1 = __SMUSDX(R, C2) >> 16U;
  338. /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
  339. out2 = __SMUAD(C2, R);
  340. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  341. /* Reading i0+3fftLen/4 */
  342. /* Read yb (real), xb(imag) input */
  343. T = read_q15x2 (pSi1);
  344. /* writing the butterfly processed i0 + fftLen/4 sample */
  345. /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  346. /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  347. write_q15x2 (pSi1, __PKHBT( out1, out2, 0 ));
  348. pSi1 += 2 * n1;
  349. /* Butterfly calculations */
  350. /* Read yd (real), xd(imag) input */
  351. U = read_q15x2 (pSi3);
  352. /* T = packed(yb-yd, xb-xd) */
  353. T = __QSUB16(T, U);
  354. #ifndef ARM_MATH_BIG_ENDIAN
  355. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  356. R = __SHASX(S, T);
  357. /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
  358. S = __SHSAX(S, T);
  359. /* Butterfly process for the i0+fftLen/2 sample */
  360. out1 = __SMUAD(C1, S) >> 16U;
  361. out2 = __SMUSDX(C1, S);
  362. #else
  363. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  364. R = __SHSAX(S, T);
  365. /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
  366. S = __SHASX(S, T);
  367. /* Butterfly process for the i0+fftLen/2 sample */
  368. out1 = __SMUSDX(S, C1) >> 16U;
  369. out2 = __SMUAD(C1, S);
  370. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  371. /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  372. /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  373. write_q15x2 (pSi2, __PKHBT( out1, out2, 0 ));
  374. pSi2 += 2 * n1;
  375. /* Butterfly process for the i0+3fftLen/4 sample */
  376. #ifndef ARM_MATH_BIG_ENDIAN
  377. out1 = __SMUAD(C3, R) >> 16U;
  378. out2 = __SMUSDX(C3, R);
  379. #else
  380. out1 = __SMUSDX(R, C3) >> 16U;
  381. out2 = __SMUAD(C3, R);
  382. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  383. /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
  384. /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
  385. write_q15x2 (pSi3, __PKHBT( out1, out2, 0 ));
  386. pSi3 += 2 * n1;
  387. }
  388. }
  389. /* Twiddle coefficients index modifier */
  390. twidCoefModifier <<= 2U;
  391. }
  392. /* end of middle stage process */
  393. /* data is in 10.6(q6) format for the 1024 point */
  394. /* data is in 8.8(q8) format for the 256 point */
  395. /* data is in 6.10(q10) format for the 64 point */
  396. /* data is in 4.12(q12) format for the 16 point */
  397. /* Initializations for the last stage */
  398. j = fftLen >> 2;
  399. ptr1 = &pSrc16[0];
  400. /* start of last stage process */
  401. /* Butterfly implementation */
  402. do
  403. {
  404. /* Read xa (real), ya(imag) input */
  405. xaya = read_q15x2_ia (&ptr1);
  406. /* Read xb (real), yb(imag) input */
  407. xbyb = read_q15x2_ia (&ptr1);
  408. /* Read xc (real), yc(imag) input */
  409. xcyc = read_q15x2_ia (&ptr1);
  410. /* Read xd (real), yd(imag) input */
  411. xdyd = read_q15x2_ia (&ptr1);
  412. /* R = packed((ya + yc), (xa + xc)) */
  413. R = __QADD16(xaya, xcyc);
  414. /* T = packed((yb + yd), (xb + xd)) */
  415. T = __QADD16(xbyb, xdyd);
  416. /* pointer updation for writing */
  417. ptr1 = ptr1 - 8U;
  418. /* xa' = xa + xb + xc + xd */
  419. /* ya' = ya + yb + yc + yd */
  420. write_q15x2_ia (&ptr1, __SHADD16(R, T));
  421. /* T = packed((yb + yd), (xb + xd)) */
  422. T = __QADD16(xbyb, xdyd);
  423. /* xc' = (xa-xb+xc-xd) */
  424. /* yc' = (ya-yb+yc-yd) */
  425. write_q15x2_ia (&ptr1, __SHSUB16(R, T));
  426. /* S = packed((ya - yc), (xa - xc)) */
  427. S = __QSUB16(xaya, xcyc);
  428. /* Read yd (real), xd(imag) input */
  429. /* T = packed( (yb - yd), (xb - xd)) */
  430. U = __QSUB16(xbyb, xdyd);
  431. #ifndef ARM_MATH_BIG_ENDIAN
  432. /* xb' = (xa+yb-xc-yd) */
  433. /* yb' = (ya-xb-yc+xd) */
  434. write_q15x2_ia (&ptr1, __SHSAX(S, U));
  435. /* xd' = (xa-yb-xc+yd) */
  436. /* yd' = (ya+xb-yc-xd) */
  437. write_q15x2_ia (&ptr1, __SHASX(S, U));
  438. #else
  439. /* xb' = (xa+yb-xc-yd) */
  440. /* yb' = (ya-xb-yc+xd) */
  441. write_q15x2_ia (&ptr1, __SHASX(S, U));
  442. /* xd' = (xa-yb-xc+yd) */
  443. /* yd' = (ya+xb-yc-xd) */
  444. write_q15x2_ia (&ptr1, __SHSAX(S, U));
  445. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  446. } while (--j);
  447. /* end of last stage process */
  448. /* output is in 11.5(q5) format for the 1024 point */
  449. /* output is in 9.7(q7) format for the 256 point */
  450. /* output is in 7.9(q9) format for the 64 point */
  451. /* output is in 5.11(q11) format for the 16 point */
  452. #else /* #if defined (ARM_MATH_DSP) */
  453. q15_t R0, R1, S0, S1, T0, T1, U0, U1;
  454. q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
  455. uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
  456. /* Total process is divided into three stages */
  457. /* process first stage, middle stages, & last stage */
  458. /* Initializations for the first stage */
  459. n2 = fftLen;
  460. n1 = n2;
  461. /* n2 = fftLen/4 */
  462. n2 >>= 2U;
  463. /* Index for twiddle coefficient */
  464. ic = 0U;
  465. /* Index for input read and output write */
  466. i0 = 0U;
  467. j = n2;
  468. /* Input is in 1.15(q15) format */
  469. /* start of first stage process */
  470. do
  471. {
  472. /* Butterfly implementation */
  473. /* index calculation for the input as, */
  474. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  475. i1 = i0 + n2;
  476. i2 = i1 + n2;
  477. i3 = i2 + n2;
  478. /* Reading i0, i0+fftLen/2 inputs */
  479. /* input is down scale by 4 to avoid overflow */
  480. /* Read ya (real), xa(imag) input */
  481. T0 = pSrc16[i0 * 2U] >> 2U;
  482. T1 = pSrc16[(i0 * 2U) + 1U] >> 2U;
  483. /* input is down scale by 4 to avoid overflow */
  484. /* Read yc (real), xc(imag) input */
  485. S0 = pSrc16[i2 * 2U] >> 2U;
  486. S1 = pSrc16[(i2 * 2U) + 1U] >> 2U;
  487. /* R0 = (ya + yc) */
  488. R0 = __SSAT(T0 + S0, 16U);
  489. /* R1 = (xa + xc) */
  490. R1 = __SSAT(T1 + S1, 16U);
  491. /* S0 = (ya - yc) */
  492. S0 = __SSAT(T0 - S0, 16);
  493. /* S1 = (xa - xc) */
  494. S1 = __SSAT(T1 - S1, 16);
  495. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  496. /* input is down scale by 4 to avoid overflow */
  497. /* Read yb (real), xb(imag) input */
  498. T0 = pSrc16[i1 * 2U] >> 2U;
  499. T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
  500. /* input is down scale by 4 to avoid overflow */
  501. /* Read yd (real), xd(imag) input */
  502. U0 = pSrc16[i3 * 2U] >> 2U;
  503. U1 = pSrc16[(i3 * 2U) + 1] >> 2U;
  504. /* T0 = (yb + yd) */
  505. T0 = __SSAT(T0 + U0, 16U);
  506. /* T1 = (xb + xd) */
  507. T1 = __SSAT(T1 + U1, 16U);
  508. /* writing the butterfly processed i0 sample */
  509. /* ya' = ya + yb + yc + yd */
  510. /* xa' = xa + xb + xc + xd */
  511. pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
  512. pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
  513. /* R0 = (ya + yc) - (yb + yd) */
  514. /* R1 = (xa + xc) - (xb + xd) */
  515. R0 = __SSAT(R0 - T0, 16U);
  516. R1 = __SSAT(R1 - T1, 16U);
  517. /* co2 & si2 are read from Coefficient pointer */
  518. Co2 = pCoef16[2U * ic * 2U];
  519. Si2 = pCoef16[(2U * ic * 2U) + 1];
  520. /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  521. out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U);
  522. /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  523. out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U);
  524. /* Reading i0+fftLen/4 */
  525. /* input is down scale by 4 to avoid overflow */
  526. /* T0 = yb, T1 = xb */
  527. T0 = pSrc16[i1 * 2U] >> 2;
  528. T1 = pSrc16[(i1 * 2U) + 1] >> 2;
  529. /* writing the butterfly processed i0 + fftLen/4 sample */
  530. /* writing output(xc', yc') in little endian format */
  531. pSrc16[i1 * 2U] = out1;
  532. pSrc16[(i1 * 2U) + 1] = out2;
  533. /* Butterfly calculations */
  534. /* input is down scale by 4 to avoid overflow */
  535. /* U0 = yd, U1 = xd */
  536. U0 = pSrc16[i3 * 2U] >> 2;
  537. U1 = pSrc16[(i3 * 2U) + 1] >> 2;
  538. /* T0 = yb-yd */
  539. T0 = __SSAT(T0 - U0, 16);
  540. /* T1 = xb-xd */
  541. T1 = __SSAT(T1 - U1, 16);
  542. /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */
  543. R0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
  544. R1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
  545. /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */
  546. S0 = (q15_t) __SSAT(((q31_t) S0 + T1), 16U);
  547. S1 = (q15_t) __SSAT(((q31_t) S1 - T0), 16U);
  548. /* co1 & si1 are read from Coefficient pointer */
  549. Co1 = pCoef16[ic * 2U];
  550. Si1 = pCoef16[(ic * 2U) + 1];
  551. /* Butterfly process for the i0+fftLen/2 sample */
  552. /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  553. out1 = (q15_t) ((Si1 * S1 + Co1 * S0) >> 16);
  554. /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  555. out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16);
  556. /* writing output(xb', yb') in little endian format */
  557. pSrc16[i2 * 2U] = out1;
  558. pSrc16[(i2 * 2U) + 1] = out2;
  559. /* Co3 & si3 are read from Coefficient pointer */
  560. Co3 = pCoef16[3U * (ic * 2U)];
  561. Si3 = pCoef16[(3U * (ic * 2U)) + 1];
  562. /* Butterfly process for the i0+3fftLen/4 sample */
  563. /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
  564. out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U);
  565. /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
  566. out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U);
  567. /* writing output(xd', yd') in little endian format */
  568. pSrc16[i3 * 2U] = out1;
  569. pSrc16[(i3 * 2U) + 1] = out2;
  570. /* Twiddle coefficients index modifier */
  571. ic = ic + twidCoefModifier;
  572. /* Updating input index */
  573. i0 = i0 + 1U;
  574. } while (--j);
  575. /* data is in 4.11(q11) format */
  576. /* end of first stage process */
  577. /* start of middle stage process */
  578. /* Twiddle coefficients index modifier */
  579. twidCoefModifier <<= 2U;
  580. /* Calculation of Middle stage */
  581. for (k = fftLen / 4U; k > 4U; k >>= 2U)
  582. {
  583. /* Initializations for the middle stage */
  584. n1 = n2;
  585. n2 >>= 2U;
  586. ic = 0U;
  587. for (j = 0U; j <= (n2 - 1U); j++)
  588. {
  589. /* index calculation for the coefficients */
  590. Co1 = pCoef16[ic * 2U];
  591. Si1 = pCoef16[(ic * 2U) + 1U];
  592. Co2 = pCoef16[2U * (ic * 2U)];
  593. Si2 = pCoef16[(2U * (ic * 2U)) + 1U];
  594. Co3 = pCoef16[3U * (ic * 2U)];
  595. Si3 = pCoef16[(3U * (ic * 2U)) + 1U];
  596. /* Twiddle coefficients index modifier */
  597. ic = ic + twidCoefModifier;
  598. /* Butterfly implementation */
  599. for (i0 = j; i0 < fftLen; i0 += n1)
  600. {
  601. /* index calculation for the input as, */
  602. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  603. i1 = i0 + n2;
  604. i2 = i1 + n2;
  605. i3 = i2 + n2;
  606. /* Reading i0, i0+fftLen/2 inputs */
  607. /* Read ya (real), xa(imag) input */
  608. T0 = pSrc16[i0 * 2U];
  609. T1 = pSrc16[(i0 * 2U) + 1U];
  610. /* Read yc (real), xc(imag) input */
  611. S0 = pSrc16[i2 * 2U];
  612. S1 = pSrc16[(i2 * 2U) + 1U];
  613. /* R0 = (ya + yc), R1 = (xa + xc) */
  614. R0 = __SSAT(T0 + S0, 16);
  615. R1 = __SSAT(T1 + S1, 16);
  616. /* S0 = (ya - yc), S1 =(xa - xc) */
  617. S0 = __SSAT(T0 - S0, 16);
  618. S1 = __SSAT(T1 - S1, 16);
  619. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  620. /* Read yb (real), xb(imag) input */
  621. T0 = pSrc16[i1 * 2U];
  622. T1 = pSrc16[(i1 * 2U) + 1U];
  623. /* Read yd (real), xd(imag) input */
  624. U0 = pSrc16[i3 * 2U];
  625. U1 = pSrc16[(i3 * 2U) + 1U];
  626. /* T0 = (yb + yd), T1 = (xb + xd) */
  627. T0 = __SSAT(T0 + U0, 16);
  628. T1 = __SSAT(T1 + U1, 16);
  629. /* writing the butterfly processed i0 sample */
  630. /* xa' = xa + xb + xc + xd */
  631. /* ya' = ya + yb + yc + yd */
  632. out1 = ((R0 >> 1U) + (T0 >> 1U)) >> 1U;
  633. out2 = ((R1 >> 1U) + (T1 >> 1U)) >> 1U;
  634. pSrc16[i0 * 2U] = out1;
  635. pSrc16[(2U * i0) + 1U] = out2;
  636. /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
  637. R0 = (R0 >> 1U) - (T0 >> 1U);
  638. R1 = (R1 >> 1U) - (T1 >> 1U);
  639. /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
  640. out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U);
  641. /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  642. out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U);
  643. /* Reading i0+3fftLen/4 */
  644. /* Read yb (real), xb(imag) input */
  645. T0 = pSrc16[i1 * 2U];
  646. T1 = pSrc16[(i1 * 2U) + 1U];
  647. /* writing the butterfly processed i0 + fftLen/4 sample */
  648. /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  649. /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  650. pSrc16[i1 * 2U] = out1;
  651. pSrc16[(i1 * 2U) + 1U] = out2;
  652. /* Butterfly calculations */
  653. /* Read yd (real), xd(imag) input */
  654. U0 = pSrc16[i3 * 2U];
  655. U1 = pSrc16[(i3 * 2U) + 1U];
  656. /* T0 = yb-yd, T1 = xb-xd */
  657. T0 = __SSAT(T0 - U0, 16);
  658. T1 = __SSAT(T1 - U1, 16);
  659. /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */
  660. R0 = (S0 >> 1U) - (T1 >> 1U);
  661. R1 = (S1 >> 1U) + (T0 >> 1U);
  662. /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */
  663. S0 = (S0 >> 1U) + (T1 >> 1U);
  664. S1 = (S1 >> 1U) - (T0 >> 1U);
  665. /* Butterfly process for the i0+fftLen/2 sample */
  666. out1 = (q15_t) ((Co1 * S0 + Si1 * S1) >> 16U);
  667. out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16U);
  668. /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  669. /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  670. pSrc16[i2 * 2U] = out1;
  671. pSrc16[(i2 * 2U) + 1U] = out2;
  672. /* Butterfly process for the i0+3fftLen/4 sample */
  673. out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U);
  674. out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U);
  675. /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
  676. /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
  677. pSrc16[i3 * 2U] = out1;
  678. pSrc16[(i3 * 2U) + 1U] = out2;
  679. }
  680. }
  681. /* Twiddle coefficients index modifier */
  682. twidCoefModifier <<= 2U;
  683. }
  684. /* end of middle stage process */
  685. /* data is in 10.6(q6) format for the 1024 point */
  686. /* data is in 8.8(q8) format for the 256 point */
  687. /* data is in 6.10(q10) format for the 64 point */
  688. /* data is in 4.12(q12) format for the 16 point */
  689. /* Initializations for the last stage */
  690. n1 = n2;
  691. n2 >>= 2U;
  692. /* start of last stage process */
  693. /* Butterfly implementation */
  694. for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
  695. {
  696. /* index calculation for the input as, */
  697. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  698. i1 = i0 + n2;
  699. i2 = i1 + n2;
  700. i3 = i2 + n2;
  701. /* Reading i0, i0+fftLen/2 inputs */
  702. /* Read ya (real), xa(imag) input */
  703. T0 = pSrc16[i0 * 2U];
  704. T1 = pSrc16[(i0 * 2U) + 1U];
  705. /* Read yc (real), xc(imag) input */
  706. S0 = pSrc16[i2 * 2U];
  707. S1 = pSrc16[(i2 * 2U) + 1U];
  708. /* R0 = (ya + yc), R1 = (xa + xc) */
  709. R0 = __SSAT(T0 + S0, 16U);
  710. R1 = __SSAT(T1 + S1, 16U);
  711. /* S0 = (ya - yc), S1 = (xa - xc) */
  712. S0 = __SSAT(T0 - S0, 16U);
  713. S1 = __SSAT(T1 - S1, 16U);
  714. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  715. /* Read yb (real), xb(imag) input */
  716. T0 = pSrc16[i1 * 2U];
  717. T1 = pSrc16[(i1 * 2U) + 1U];
  718. /* Read yd (real), xd(imag) input */
  719. U0 = pSrc16[i3 * 2U];
  720. U1 = pSrc16[(i3 * 2U) + 1U];
  721. /* T0 = (yb + yd), T1 = (xb + xd)) */
  722. T0 = __SSAT(T0 + U0, 16U);
  723. T1 = __SSAT(T1 + U1, 16U);
  724. /* writing the butterfly processed i0 sample */
  725. /* xa' = xa + xb + xc + xd */
  726. /* ya' = ya + yb + yc + yd */
  727. pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
  728. pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
  729. /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
  730. R0 = (R0 >> 1U) - (T0 >> 1U);
  731. R1 = (R1 >> 1U) - (T1 >> 1U);
  732. /* Read yb (real), xb(imag) input */
  733. T0 = pSrc16[i1 * 2U];
  734. T1 = pSrc16[(i1 * 2U) + 1U];
  735. /* writing the butterfly processed i0 + fftLen/4 sample */
  736. /* xc' = (xa-xb+xc-xd) */
  737. /* yc' = (ya-yb+yc-yd) */
  738. pSrc16[i1 * 2U] = R0;
  739. pSrc16[(i1 * 2U) + 1U] = R1;
  740. /* Read yd (real), xd(imag) input */
  741. U0 = pSrc16[i3 * 2U];
  742. U1 = pSrc16[(i3 * 2U) + 1U];
  743. /* T0 = (yb - yd), T1 = (xb - xd) */
  744. T0 = __SSAT(T0 - U0, 16U);
  745. T1 = __SSAT(T1 - U1, 16U);
  746. /* writing the butterfly processed i0 + fftLen/2 sample */
  747. /* xb' = (xa+yb-xc-yd) */
  748. /* yb' = (ya-xb-yc+xd) */
  749. pSrc16[i2 * 2U] = (S0 >> 1U) + (T1 >> 1U);
  750. pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U);
  751. /* writing the butterfly processed i0 + 3fftLen/4 sample */
  752. /* xd' = (xa-yb-xc+yd) */
  753. /* yd' = (ya+xb-yc-xd) */
  754. pSrc16[i3 * 2U] = (S0 >> 1U) - (T1 >> 1U);
  755. pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U);
  756. }
  757. /* end of last stage process */
  758. /* output is in 11.5(q5) format for the 1024 point */
  759. /* output is in 9.7(q7) format for the 256 point */
  760. /* output is in 7.9(q9) format for the 64 point */
  761. /* output is in 5.11(q11) format for the 16 point */
  762. #endif /* #if defined (ARM_MATH_DSP) */
  763. }
  764. /**
  765. @brief Core function for the Q15 CIFFT butterfly process.
  766. @param[in,out] pSrc16 points to the in-place buffer of Q15 data type
  767. @param[in] fftLen length of the FFT
  768. @param[in] pCoef16 points to twiddle coefficient buffer
  769. @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  770. */
  771. /*
  772. * Radix-4 IFFT algorithm used is :
  773. *
  774. * CIFFT uses same twiddle coefficients as CFFT function
  775. * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
  776. *
  777. *
  778. * IFFT is implemented with following changes in equations from FFT
  779. *
  780. * Input real and imaginary data:
  781. * x(n) = xa + j * ya
  782. * x(n+N/4 ) = xb + j * yb
  783. * x(n+N/2 ) = xc + j * yc
  784. * x(n+3N 4) = xd + j * yd
  785. *
  786. *
  787. * Output real and imaginary data:
  788. * x(4r) = xa'+ j * ya'
  789. * x(4r+1) = xb'+ j * yb'
  790. * x(4r+2) = xc'+ j * yc'
  791. * x(4r+3) = xd'+ j * yd'
  792. *
  793. *
  794. * Twiddle factors for radix-4 IFFT:
  795. * Wn = co1 + j * (si1)
  796. * W2n = co2 + j * (si2)
  797. * W3n = co3 + j * (si3)
  798. * The real and imaginary output values for the radix-4 butterfly are
  799. * xa' = xa + xb + xc + xd
  800. * ya' = ya + yb + yc + yd
  801. * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
  802. * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
  803. * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
  804. * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
  805. * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
  806. * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
  807. *
  808. */
  809. void arm_radix4_butterfly_inverse_q15(
  810. q15_t * pSrc16,
  811. uint32_t fftLen,
  812. const q15_t * pCoef16,
  813. uint32_t twidCoefModifier)
  814. {
  815. #if defined (ARM_MATH_DSP)
  816. q31_t R, S, T, U;
  817. q31_t C1, C2, C3, out1, out2;
  818. uint32_t n1, n2, ic, i0, j, k;
  819. q15_t *ptr1;
  820. q15_t *pSi0;
  821. q15_t *pSi1;
  822. q15_t *pSi2;
  823. q15_t *pSi3;
  824. q31_t xaya, xbyb, xcyc, xdyd;
  825. /* Total process is divided into three stages */
  826. /* process first stage, middle stages, & last stage */
  827. /* Initializations for the first stage */
  828. n2 = fftLen;
  829. n1 = n2;
  830. /* n2 = fftLen/4 */
  831. n2 >>= 2U;
  832. /* Index for twiddle coefficient */
  833. ic = 0U;
  834. /* Index for input read and output write */
  835. j = n2;
  836. pSi0 = pSrc16;
  837. pSi1 = pSi0 + 2 * n2;
  838. pSi2 = pSi1 + 2 * n2;
  839. pSi3 = pSi2 + 2 * n2;
  840. /* Input is in 1.15(q15) format */
  841. /* start of first stage process */
  842. do
  843. {
  844. /* Butterfly implementation */
  845. /* Reading i0, i0+fftLen/2 inputs */
  846. /* Read ya (real), xa(imag) input */
  847. T = read_q15x2 (pSi0);
  848. T = __SHADD16(T, 0);
  849. T = __SHADD16(T, 0);
  850. /* Read yc (real), xc(imag) input */
  851. S = read_q15x2 (pSi2);
  852. S = __SHADD16(S, 0);
  853. S = __SHADD16(S, 0);
  854. /* R = packed((ya + yc), (xa + xc) ) */
  855. R = __QADD16(T, S);
  856. /* S = packed((ya - yc), (xa - xc) ) */
  857. S = __QSUB16(T, S);
  858. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  859. /* Read yb (real), xb(imag) input */
  860. T = read_q15x2 (pSi1);
  861. T = __SHADD16(T, 0);
  862. T = __SHADD16(T, 0);
  863. /* Read yd (real), xd(imag) input */
  864. U = read_q15x2 (pSi3);
  865. U = __SHADD16(U, 0);
  866. U = __SHADD16(U, 0);
  867. /* T = packed((yb + yd), (xb + xd) ) */
  868. T = __QADD16(T, U);
  869. /* writing the butterfly processed i0 sample */
  870. /* xa' = xa + xb + xc + xd */
  871. /* ya' = ya + yb + yc + yd */
  872. write_q15x2_ia (&pSi0, __SHADD16(R, T));
  873. /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
  874. R = __QSUB16(R, T);
  875. /* co2 & si2 are read from SIMD Coefficient pointer */
  876. C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
  877. #ifndef ARM_MATH_BIG_ENDIAN
  878. /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  879. out1 = __SMUSD(C2, R) >> 16U;
  880. /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  881. out2 = __SMUADX(C2, R);
  882. #else
  883. /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  884. out1 = __SMUADX(C2, R) >> 16U;
  885. /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  886. out2 = __SMUSD(__QSUB16(0, C2), R);
  887. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  888. /* Reading i0+fftLen/4 */
  889. /* T = packed(yb, xb) */
  890. T = read_q15x2 (pSi1);
  891. T = __SHADD16(T, 0);
  892. T = __SHADD16(T, 0);
  893. /* writing the butterfly processed i0 + fftLen/4 sample */
  894. /* writing output(xc', yc') in little endian format */
  895. write_q15x2_ia (&pSi1, (q31_t) __PKHBT( out1, out2, 0 ));
  896. /* Butterfly calculations */
  897. /* U = packed(yd, xd) */
  898. U = read_q15x2 (pSi3);
  899. U = __SHADD16(U, 0);
  900. U = __SHADD16(U, 0);
  901. /* T = packed(yb-yd, xb-xd) */
  902. T = __QSUB16(T, U);
  903. #ifndef ARM_MATH_BIG_ENDIAN
  904. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  905. R = __QSAX(S, T);
  906. /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */
  907. S = __QASX(S, T);
  908. #else
  909. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  910. R = __QASX(S, T);
  911. /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
  912. S = __QSAX(S, T);
  913. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  914. /* co1 & si1 are read from SIMD Coefficient pointer */
  915. C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
  916. /* Butterfly process for the i0+fftLen/2 sample */
  917. #ifndef ARM_MATH_BIG_ENDIAN
  918. /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  919. out1 = __SMUSD(C1, S) >> 16U;
  920. /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  921. out2 = __SMUADX(C1, S);
  922. #else
  923. /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  924. out1 = __SMUADX(C1, S) >> 16U;
  925. /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  926. out2 = __SMUSD(__QSUB16(0, C1), S);
  927. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  928. /* writing output(xb', yb') in little endian format */
  929. write_q15x2_ia (&pSi2, __PKHBT( out1, out2, 0 ));
  930. /* co3 & si3 are read from SIMD Coefficient pointer */
  931. C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
  932. /* Butterfly process for the i0+3fftLen/4 sample */
  933. #ifndef ARM_MATH_BIG_ENDIAN
  934. /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
  935. out1 = __SMUSD(C3, R) >> 16U;
  936. /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
  937. out2 = __SMUADX(C3, R);
  938. #else
  939. /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
  940. out1 = __SMUADX(C3, R) >> 16U;
  941. /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
  942. out2 = __SMUSD(__QSUB16(0, C3), R);
  943. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  944. /* writing output(xd', yd') in little endian format */
  945. write_q15x2_ia (&pSi3, __PKHBT( out1, out2, 0 ));
  946. /* Twiddle coefficients index modifier */
  947. ic = ic + twidCoefModifier;
  948. } while (--j);
  949. /* data is in 4.11(q11) format */
  950. /* end of first stage process */
  951. /* start of middle stage process */
  952. /* Twiddle coefficients index modifier */
  953. twidCoefModifier <<= 2U;
  954. /* Calculation of Middle stage */
  955. for (k = fftLen / 4U; k > 4U; k >>= 2U)
  956. {
  957. /* Initializations for the middle stage */
  958. n1 = n2;
  959. n2 >>= 2U;
  960. ic = 0U;
  961. for (j = 0U; j <= (n2 - 1U); j++)
  962. {
  963. /* index calculation for the coefficients */
  964. C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
  965. C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
  966. C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
  967. /* Twiddle coefficients index modifier */
  968. ic = ic + twidCoefModifier;
  969. pSi0 = pSrc16 + 2 * j;
  970. pSi1 = pSi0 + 2 * n2;
  971. pSi2 = pSi1 + 2 * n2;
  972. pSi3 = pSi2 + 2 * n2;
  973. /* Butterfly implementation */
  974. for (i0 = j; i0 < fftLen; i0 += n1)
  975. {
  976. /* Reading i0, i0+fftLen/2 inputs */
  977. /* Read ya (real), xa(imag) input */
  978. T = read_q15x2 (pSi0);
  979. /* Read yc (real), xc(imag) input */
  980. S = read_q15x2 (pSi2);
  981. /* R = packed( (ya + yc), (xa + xc)) */
  982. R = __QADD16(T, S);
  983. /* S = packed((ya - yc), (xa - xc)) */
  984. S = __QSUB16(T, S);
  985. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  986. /* Read yb (real), xb(imag) input */
  987. T = read_q15x2 (pSi1);
  988. /* Read yd (real), xd(imag) input */
  989. U = read_q15x2 (pSi3);
  990. /* T = packed( (yb + yd), (xb + xd)) */
  991. T = __QADD16(T, U);
  992. /* writing the butterfly processed i0 sample */
  993. /* xa' = xa + xb + xc + xd */
  994. /* ya' = ya + yb + yc + yd */
  995. out1 = __SHADD16(R, T);
  996. out1 = __SHADD16(out1, 0);
  997. write_q15x2 (pSi0, out1);
  998. pSi0 += 2 * n1;
  999. /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
  1000. R = __SHSUB16(R, T);
  1001. #ifndef ARM_MATH_BIG_ENDIAN
  1002. /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
  1003. out1 = __SMUSD(C2, R) >> 16U;
  1004. /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  1005. out2 = __SMUADX(C2, R);
  1006. #else
  1007. /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  1008. out1 = __SMUADX(R, C2) >> 16U;
  1009. /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
  1010. out2 = __SMUSD(__QSUB16(0, C2), R);
  1011. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  1012. /* Reading i0+3fftLen/4 */
  1013. /* Read yb (real), xb(imag) input */
  1014. T = read_q15x2 (pSi1);
  1015. /* writing the butterfly processed i0 + fftLen/4 sample */
  1016. /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  1017. /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  1018. write_q15x2 (pSi1, __PKHBT( out1, out2, 0 ));
  1019. pSi1 += 2 * n1;
  1020. /* Butterfly calculations */
  1021. /* Read yd (real), xd(imag) input */
  1022. U = read_q15x2 (pSi3);
  1023. /* T = packed(yb-yd, xb-xd) */
  1024. T = __QSUB16(T, U);
  1025. #ifndef ARM_MATH_BIG_ENDIAN
  1026. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  1027. R = __SHSAX(S, T);
  1028. /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
  1029. S = __SHASX(S, T);
  1030. /* Butterfly process for the i0+fftLen/2 sample */
  1031. out1 = __SMUSD(C1, S) >> 16U;
  1032. out2 = __SMUADX(C1, S);
  1033. #else
  1034. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  1035. R = __SHASX(S, T);
  1036. /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
  1037. S = __SHSAX(S, T);
  1038. /* Butterfly process for the i0+fftLen/2 sample */
  1039. out1 = __SMUADX(S, C1) >> 16U;
  1040. out2 = __SMUSD(__QSUB16(0, C1), S);
  1041. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  1042. /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  1043. /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  1044. write_q15x2 (pSi2, __PKHBT( out1, out2, 0 ));
  1045. pSi2 += 2 * n1;
  1046. /* Butterfly process for the i0+3fftLen/4 sample */
  1047. #ifndef ARM_MATH_BIG_ENDIAN
  1048. out1 = __SMUSD(C3, R) >> 16U;
  1049. out2 = __SMUADX(C3, R);
  1050. #else
  1051. out1 = __SMUADX(C3, R) >> 16U;
  1052. out2 = __SMUSD(__QSUB16(0, C3), R);
  1053. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  1054. /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
  1055. /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
  1056. write_q15x2 (pSi3, __PKHBT( out1, out2, 0 ));
  1057. pSi3 += 2 * n1;
  1058. }
  1059. }
  1060. /* Twiddle coefficients index modifier */
  1061. twidCoefModifier <<= 2U;
  1062. }
  1063. /* end of middle stage process */
  1064. /* data is in 10.6(q6) format for the 1024 point */
  1065. /* data is in 8.8(q8) format for the 256 point */
  1066. /* data is in 6.10(q10) format for the 64 point */
  1067. /* data is in 4.12(q12) format for the 16 point */
  1068. /* Initializations for the last stage */
  1069. j = fftLen >> 2;
  1070. ptr1 = &pSrc16[0];
  1071. /* start of last stage process */
  1072. /* Butterfly implementation */
  1073. do
  1074. {
  1075. /* Read xa (real), ya(imag) input */
  1076. xaya = read_q15x2_ia (&ptr1);
  1077. /* Read xb (real), yb(imag) input */
  1078. xbyb = read_q15x2_ia (&ptr1);
  1079. /* Read xc (real), yc(imag) input */
  1080. xcyc = read_q15x2_ia (&ptr1);
  1081. /* Read xd (real), yd(imag) input */
  1082. xdyd = read_q15x2_ia (&ptr1);
  1083. /* R = packed((ya + yc), (xa + xc)) */
  1084. R = __QADD16(xaya, xcyc);
  1085. /* T = packed((yb + yd), (xb + xd)) */
  1086. T = __QADD16(xbyb, xdyd);
  1087. /* pointer updation for writing */
  1088. ptr1 = ptr1 - 8U;
  1089. /* xa' = xa + xb + xc + xd */
  1090. /* ya' = ya + yb + yc + yd */
  1091. write_q15x2_ia (&ptr1, __SHADD16(R, T));
  1092. /* T = packed((yb + yd), (xb + xd)) */
  1093. T = __QADD16(xbyb, xdyd);
  1094. /* xc' = (xa-xb+xc-xd) */
  1095. /* yc' = (ya-yb+yc-yd) */
  1096. write_q15x2_ia (&ptr1, __SHSUB16(R, T));
  1097. /* S = packed((ya - yc), (xa - xc)) */
  1098. S = __QSUB16(xaya, xcyc);
  1099. /* Read yd (real), xd(imag) input */
  1100. /* T = packed( (yb - yd), (xb - xd)) */
  1101. U = __QSUB16(xbyb, xdyd);
  1102. #ifndef ARM_MATH_BIG_ENDIAN
  1103. /* xb' = (xa+yb-xc-yd) */
  1104. /* yb' = (ya-xb-yc+xd) */
  1105. write_q15x2_ia (&ptr1, __SHASX(S, U));
  1106. /* xd' = (xa-yb-xc+yd) */
  1107. /* yd' = (ya+xb-yc-xd) */
  1108. write_q15x2_ia (&ptr1, __SHSAX(S, U));
  1109. #else
  1110. /* xb' = (xa+yb-xc-yd) */
  1111. /* yb' = (ya-xb-yc+xd) */
  1112. write_q15x2_ia (&ptr1, __SHSAX(S, U));
  1113. /* xd' = (xa-yb-xc+yd) */
  1114. /* yd' = (ya+xb-yc-xd) */
  1115. write_q15x2_ia (&ptr1, __SHASX(S, U));
  1116. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  1117. } while (--j);
  1118. /* end of last stage process */
  1119. /* output is in 11.5(q5) format for the 1024 point */
  1120. /* output is in 9.7(q7) format for the 256 point */
  1121. /* output is in 7.9(q9) format for the 64 point */
  1122. /* output is in 5.11(q11) format for the 16 point */
  1123. #else /* arm_radix4_butterfly_inverse_q15 */
  1124. q15_t R0, R1, S0, S1, T0, T1, U0, U1;
  1125. q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
  1126. uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
  1127. /* Total process is divided into three stages */
  1128. /* process first stage, middle stages, & last stage */
  1129. /* Initializations for the first stage */
  1130. n2 = fftLen;
  1131. n1 = n2;
  1132. /* n2 = fftLen/4 */
  1133. n2 >>= 2U;
  1134. /* Index for twiddle coefficient */
  1135. ic = 0U;
  1136. /* Index for input read and output write */
  1137. i0 = 0U;
  1138. j = n2;
  1139. /* Input is in 1.15(q15) format */
  1140. /* Start of first stage process */
  1141. do
  1142. {
  1143. /* Butterfly implementation */
  1144. /* index calculation for the input as, */
  1145. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  1146. i1 = i0 + n2;
  1147. i2 = i1 + n2;
  1148. i3 = i2 + n2;
  1149. /* Reading i0, i0+fftLen/2 inputs */
  1150. /* input is down scale by 4 to avoid overflow */
  1151. /* Read ya (real), xa(imag) input */
  1152. T0 = pSrc16[i0 * 2U] >> 2U;
  1153. T1 = pSrc16[(i0 * 2U) + 1U] >> 2U;
  1154. /* input is down scale by 4 to avoid overflow */
  1155. /* Read yc (real), xc(imag) input */
  1156. S0 = pSrc16[i2 * 2U] >> 2U;
  1157. S1 = pSrc16[(i2 * 2U) + 1U] >> 2U;
  1158. /* R0 = (ya + yc), R1 = (xa + xc) */
  1159. R0 = __SSAT(T0 + S0, 16U);
  1160. R1 = __SSAT(T1 + S1, 16U);
  1161. /* S0 = (ya - yc), S1 = (xa - xc) */
  1162. S0 = __SSAT(T0 - S0, 16U);
  1163. S1 = __SSAT(T1 - S1, 16U);
  1164. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  1165. /* input is down scale by 4 to avoid overflow */
  1166. /* Read yb (real), xb(imag) input */
  1167. T0 = pSrc16[i1 * 2U] >> 2U;
  1168. T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
  1169. /* Read yd (real), xd(imag) input */
  1170. /* input is down scale by 4 to avoid overflow */
  1171. U0 = pSrc16[i3 * 2U] >> 2U;
  1172. U1 = pSrc16[(i3 * 2U) + 1U] >> 2U;
  1173. /* T0 = (yb + yd), T1 = (xb + xd) */
  1174. T0 = __SSAT(T0 + U0, 16U);
  1175. T1 = __SSAT(T1 + U1, 16U);
  1176. /* writing the butterfly processed i0 sample */
  1177. /* xa' = xa + xb + xc + xd */
  1178. /* ya' = ya + yb + yc + yd */
  1179. pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
  1180. pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
  1181. /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */
  1182. R0 = __SSAT(R0 - T0, 16U);
  1183. R1 = __SSAT(R1 - T1, 16U);
  1184. /* co2 & si2 are read from Coefficient pointer */
  1185. Co2 = pCoef16[2U * ic * 2U];
  1186. Si2 = pCoef16[(2U * ic * 2U) + 1U];
  1187. /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
  1188. out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16U);
  1189. /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
  1190. out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16U);
  1191. /* Reading i0+fftLen/4 */
  1192. /* input is down scale by 4 to avoid overflow */
  1193. /* T0 = yb, T1 = xb */
  1194. T0 = pSrc16[i1 * 2U] >> 2U;
  1195. T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
  1196. /* writing the butterfly processed i0 + fftLen/4 sample */
  1197. /* writing output(xc', yc') in little endian format */
  1198. pSrc16[i1 * 2U] = out1;
  1199. pSrc16[(i1 * 2U) + 1U] = out2;
  1200. /* Butterfly calculations */
  1201. /* input is down scale by 4 to avoid overflow */
  1202. /* U0 = yd, U1 = xd) */
  1203. U0 = pSrc16[i3 * 2U] >> 2U;
  1204. U1 = pSrc16[(i3 * 2U) + 1U] >> 2U;
  1205. /* T0 = yb-yd, T1 = xb-xd) */
  1206. T0 = __SSAT(T0 - U0, 16U);
  1207. T1 = __SSAT(T1 - U1, 16U);
  1208. /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
  1209. R0 = (q15_t) __SSAT((q31_t) (S0 + T1), 16);
  1210. R1 = (q15_t) __SSAT((q31_t) (S1 - T0), 16);
  1211. /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
  1212. S0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
  1213. S1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
  1214. /* co1 & si1 are read from Coefficient pointer */
  1215. Co1 = pCoef16[ic * 2U];
  1216. Si1 = pCoef16[(ic * 2U) + 1U];
  1217. /* Butterfly process for the i0+fftLen/2 sample */
  1218. /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
  1219. out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U);
  1220. /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
  1221. out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U);
  1222. /* writing output(xb', yb') in little endian format */
  1223. pSrc16[i2 * 2U] = out1;
  1224. pSrc16[(i2 * 2U) + 1U] = out2;
  1225. /* Co3 & si3 are read from Coefficient pointer */
  1226. Co3 = pCoef16[3U * ic * 2U];
  1227. Si3 = pCoef16[(3U * ic * 2U) + 1U];
  1228. /* Butterfly process for the i0+3fftLen/4 sample */
  1229. /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
  1230. out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U);
  1231. /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
  1232. out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U);
  1233. /* writing output(xd', yd') in little endian format */
  1234. pSrc16[i3 * 2U] = out1;
  1235. pSrc16[(i3 * 2U) + 1U] = out2;
  1236. /* Twiddle coefficients index modifier */
  1237. ic = ic + twidCoefModifier;
  1238. /* Updating input index */
  1239. i0 = i0 + 1U;
  1240. } while (--j);
  1241. /* End of first stage process */
  1242. /* data is in 4.11(q11) format */
  1243. /* Start of Middle stage process */
  1244. /* Twiddle coefficients index modifier */
  1245. twidCoefModifier <<= 2U;
  1246. /* Calculation of Middle stage */
  1247. for (k = fftLen / 4U; k > 4U; k >>= 2U)
  1248. {
  1249. /* Initializations for the middle stage */
  1250. n1 = n2;
  1251. n2 >>= 2U;
  1252. ic = 0U;
  1253. for (j = 0U; j <= (n2 - 1U); j++)
  1254. {
  1255. /* index calculation for the coefficients */
  1256. Co1 = pCoef16[ic * 2U];
  1257. Si1 = pCoef16[(ic * 2U) + 1U];
  1258. Co2 = pCoef16[2U * ic * 2U];
  1259. Si2 = pCoef16[2U * ic * 2U + 1U];
  1260. Co3 = pCoef16[3U * ic * 2U];
  1261. Si3 = pCoef16[(3U * ic * 2U) + 1U];
  1262. /* Twiddle coefficients index modifier */
  1263. ic = ic + twidCoefModifier;
  1264. /* Butterfly implementation */
  1265. for (i0 = j; i0 < fftLen; i0 += n1)
  1266. {
  1267. /* index calculation for the input as, */
  1268. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  1269. i1 = i0 + n2;
  1270. i2 = i1 + n2;
  1271. i3 = i2 + n2;
  1272. /* Reading i0, i0+fftLen/2 inputs */
  1273. /* Read ya (real), xa(imag) input */
  1274. T0 = pSrc16[i0 * 2U];
  1275. T1 = pSrc16[(i0 * 2U) + 1U];
  1276. /* Read yc (real), xc(imag) input */
  1277. S0 = pSrc16[i2 * 2U];
  1278. S1 = pSrc16[(i2 * 2U) + 1U];
  1279. /* R0 = (ya + yc), R1 = (xa + xc) */
  1280. R0 = __SSAT(T0 + S0, 16U);
  1281. R1 = __SSAT(T1 + S1, 16U);
  1282. /* S0 = (ya - yc), S1 = (xa - xc) */
  1283. S0 = __SSAT(T0 - S0, 16U);
  1284. S1 = __SSAT(T1 - S1, 16U);
  1285. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  1286. /* Read yb (real), xb(imag) input */
  1287. T0 = pSrc16[i1 * 2U];
  1288. T1 = pSrc16[(i1 * 2U) + 1U];
  1289. /* Read yd (real), xd(imag) input */
  1290. U0 = pSrc16[i3 * 2U];
  1291. U1 = pSrc16[(i3 * 2U) + 1U];
  1292. /* T0 = (yb + yd), T1 = (xb + xd) */
  1293. T0 = __SSAT(T0 + U0, 16U);
  1294. T1 = __SSAT(T1 + U1, 16U);
  1295. /* writing the butterfly processed i0 sample */
  1296. /* xa' = xa + xb + xc + xd */
  1297. /* ya' = ya + yb + yc + yd */
  1298. pSrc16[i0 * 2U] = ((R0 >> 1U) + (T0 >> 1U)) >> 1U;
  1299. pSrc16[(i0 * 2U) + 1U] = ((R1 >> 1U) + (T1 >> 1U)) >> 1U;
  1300. /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
  1301. R0 = (R0 >> 1U) - (T0 >> 1U);
  1302. R1 = (R1 >> 1U) - (T1 >> 1U);
  1303. /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
  1304. out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16);
  1305. /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
  1306. out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16);
  1307. /* Reading i0+3fftLen/4 */
  1308. /* Read yb (real), xb(imag) input */
  1309. T0 = pSrc16[i1 * 2U];
  1310. T1 = pSrc16[(i1 * 2U) + 1U];
  1311. /* writing the butterfly processed i0 + fftLen/4 sample */
  1312. /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
  1313. /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
  1314. pSrc16[i1 * 2U] = out1;
  1315. pSrc16[(i1 * 2U) + 1U] = out2;
  1316. /* Butterfly calculations */
  1317. /* Read yd (real), xd(imag) input */
  1318. U0 = pSrc16[i3 * 2U];
  1319. U1 = pSrc16[(i3 * 2U) + 1U];
  1320. /* T0 = yb-yd, T1 = xb-xd) */
  1321. T0 = __SSAT(T0 - U0, 16U);
  1322. T1 = __SSAT(T1 - U1, 16U);
  1323. /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
  1324. R0 = (S0 >> 1U) + (T1 >> 1U);
  1325. R1 = (S1 >> 1U) - (T0 >> 1U);
  1326. /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
  1327. S0 = (S0 >> 1U) - (T1 >> 1U);
  1328. S1 = (S1 >> 1U) + (T0 >> 1U);
  1329. /* Butterfly process for the i0+fftLen/2 sample */
  1330. out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U);
  1331. out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U);
  1332. /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
  1333. /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
  1334. pSrc16[i2 * 2U] = out1;
  1335. pSrc16[(i2 * 2U) + 1U] = out2;
  1336. /* Butterfly process for the i0+3fftLen/4 sample */
  1337. out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U);
  1338. out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U);
  1339. /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
  1340. /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
  1341. pSrc16[i3 * 2U] = out1;
  1342. pSrc16[(i3 * 2U) + 1U] = out2;
  1343. }
  1344. }
  1345. /* Twiddle coefficients index modifier */
  1346. twidCoefModifier <<= 2U;
  1347. }
  1348. /* End of Middle stages process */
  1349. /* data is in 10.6(q6) format for the 1024 point */
  1350. /* data is in 8.8(q8) format for the 256 point */
  1351. /* data is in 6.10(q10) format for the 64 point */
  1352. /* data is in 4.12(q12) format for the 16 point */
  1353. /* start of last stage process */
  1354. /* Initializations for the last stage */
  1355. n1 = n2;
  1356. n2 >>= 2U;
  1357. /* Butterfly implementation */
  1358. for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
  1359. {
  1360. /* index calculation for the input as, */
  1361. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  1362. i1 = i0 + n2;
  1363. i2 = i1 + n2;
  1364. i3 = i2 + n2;
  1365. /* Reading i0, i0+fftLen/2 inputs */
  1366. /* Read ya (real), xa(imag) input */
  1367. T0 = pSrc16[i0 * 2U];
  1368. T1 = pSrc16[(i0 * 2U) + 1U];
  1369. /* Read yc (real), xc(imag) input */
  1370. S0 = pSrc16[i2 * 2U];
  1371. S1 = pSrc16[(i2 * 2U) + 1U];
  1372. /* R0 = (ya + yc), R1 = (xa + xc) */
  1373. R0 = __SSAT(T0 + S0, 16U);
  1374. R1 = __SSAT(T1 + S1, 16U);
  1375. /* S0 = (ya - yc), S1 = (xa - xc) */
  1376. S0 = __SSAT(T0 - S0, 16U);
  1377. S1 = __SSAT(T1 - S1, 16U);
  1378. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  1379. /* Read yb (real), xb(imag) input */
  1380. T0 = pSrc16[i1 * 2U];
  1381. T1 = pSrc16[(i1 * 2U) + 1U];
  1382. /* Read yd (real), xd(imag) input */
  1383. U0 = pSrc16[i3 * 2U];
  1384. U1 = pSrc16[(i3 * 2U) + 1U];
  1385. /* T0 = (yb + yd), T1 = (xb + xd) */
  1386. T0 = __SSAT(T0 + U0, 16U);
  1387. T1 = __SSAT(T1 + U1, 16U);
  1388. /* writing the butterfly processed i0 sample */
  1389. /* xa' = xa + xb + xc + xd */
  1390. /* ya' = ya + yb + yc + yd */
  1391. pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
  1392. pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
  1393. /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
  1394. R0 = (R0 >> 1U) - (T0 >> 1U);
  1395. R1 = (R1 >> 1U) - (T1 >> 1U);
  1396. /* Read yb (real), xb(imag) input */
  1397. T0 = pSrc16[i1 * 2U];
  1398. T1 = pSrc16[(i1 * 2U) + 1U];
  1399. /* writing the butterfly processed i0 + fftLen/4 sample */
  1400. /* xc' = (xa-xb+xc-xd) */
  1401. /* yc' = (ya-yb+yc-yd) */
  1402. pSrc16[i1 * 2U] = R0;
  1403. pSrc16[(i1 * 2U) + 1U] = R1;
  1404. /* Read yd (real), xd(imag) input */
  1405. U0 = pSrc16[i3 * 2U];
  1406. U1 = pSrc16[(i3 * 2U) + 1U];
  1407. /* T0 = (yb - yd), T1 = (xb - xd) */
  1408. T0 = __SSAT(T0 - U0, 16U);
  1409. T1 = __SSAT(T1 - U1, 16U);
  1410. /* writing the butterfly processed i0 + fftLen/2 sample */
  1411. /* xb' = (xa-yb-xc+yd) */
  1412. /* yb' = (ya+xb-yc-xd) */
  1413. pSrc16[i2 * 2U] = (S0 >> 1U) - (T1 >> 1U);
  1414. pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U);
  1415. /* writing the butterfly processed i0 + 3fftLen/4 sample */
  1416. /* xd' = (xa+yb-xc-yd) */
  1417. /* yd' = (ya-xb-yc+xd) */
  1418. pSrc16[i3 * 2U] = (S0 >> 1U) + (T1 >> 1U);
  1419. pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U);
  1420. }
  1421. /* end of last stage process */
  1422. /* output is in 11.5(q5) format for the 1024 point */
  1423. /* output is in 9.7(q7) format for the 256 point */
  1424. /* output is in 7.9(q9) format for the 64 point */
  1425. /* output is in 5.11(q11) format for the 16 point */
  1426. #endif /* #if defined (ARM_MATH_DSP) */
  1427. }