arm_cfft_radix4_f16.c 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_cfft_radix4_f16.c
  4. * Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
  5. *
  6. * $Date: 23 April 2021
  7. * $Revision: V1.9.0
  8. *
  9. * Target Processor: Cortex-M and Cortex-A cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/transform_functions_f16.h"
  29. #if defined(ARM_FLOAT16_SUPPORTED)
  30. extern void arm_bitreversal_f16(
  31. float16_t * pSrc,
  32. uint16_t fftSize,
  33. uint16_t bitRevFactor,
  34. const uint16_t * pBitRevTab);
  35. void arm_radix4_butterfly_f16(
  36. float16_t * pSrc,
  37. uint16_t fftLen,
  38. const float16_t * pCoef,
  39. uint16_t twidCoefModifier);
  40. void arm_radix4_butterfly_inverse_f16(
  41. float16_t * pSrc,
  42. uint16_t fftLen,
  43. const float16_t * pCoef,
  44. uint16_t twidCoefModifier,
  45. float16_t onebyfftLen);
  46. void arm_cfft_radix4by2_f16(
  47. float16_t * pSrc,
  48. uint32_t fftLen,
  49. const float16_t * pCoef);
  50. /**
  51. @ingroup groupTransforms
  52. */
  53. /**
  54. @addtogroup ComplexFFT
  55. @{
  56. */
  57. /*
  58. * @brief Core function for the floating-point CFFT butterfly process.
  59. * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
  60. * @param[in] fftLen length of the FFT.
  61. * @param[in] *pCoef points to the twiddle coefficient buffer.
  62. * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  63. * @return none.
  64. */
  65. void arm_cfft_radix4by2_f16(
  66. float16_t * pSrc,
  67. uint32_t fftLen,
  68. const float16_t * pCoef)
  69. {
  70. uint32_t i, l;
  71. uint32_t n2, ia;
  72. float16_t xt, yt, cosVal, sinVal;
  73. float16_t p0, p1,p2,p3,a0,a1;
  74. n2 = fftLen >> 1;
  75. ia = 0;
  76. for (i = 0; i < n2; i++)
  77. {
  78. cosVal = pCoef[2*ia];
  79. sinVal = pCoef[2*ia + 1];
  80. ia++;
  81. l = i + n2;
  82. /* Butterfly implementation */
  83. a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
  84. xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
  85. yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
  86. a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
  87. p0 = (_Float16)xt * (_Float16)cosVal;
  88. p1 = (_Float16)yt * (_Float16)sinVal;
  89. p2 = (_Float16)yt * (_Float16)cosVal;
  90. p3 = (_Float16)xt * (_Float16)sinVal;
  91. pSrc[2 * i] = a0;
  92. pSrc[2 * i + 1] = a1;
  93. pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
  94. pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
  95. }
  96. // first col
  97. arm_radix4_butterfly_f16( pSrc, n2, (float16_t*)pCoef, 2U);
  98. // second col
  99. arm_radix4_butterfly_f16( pSrc + fftLen, n2, (float16_t*)pCoef, 2U);
  100. }
  101. /**
  102. @brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
  103. @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f16 and will be removed in the future.
  104. @param[in] S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
  105. @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
  106. @return none
  107. */
  108. void arm_cfft_radix4_f16(
  109. const arm_cfft_radix4_instance_f16 * S,
  110. float16_t * pSrc)
  111. {
  112. if (S->ifftFlag == 1U)
  113. {
  114. /* Complex IFFT radix-4 */
  115. arm_radix4_butterfly_inverse_f16(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
  116. }
  117. else
  118. {
  119. /* Complex FFT radix-4 */
  120. arm_radix4_butterfly_f16(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
  121. }
  122. if (S->bitReverseFlag == 1U)
  123. {
  124. /* Bit Reversal */
  125. arm_bitreversal_f16(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
  126. }
  127. }
  128. /**
  129. @} end of ComplexFFT group
  130. */
  131. /* ----------------------------------------------------------------------
  132. * Internal helper function used by the FFTs
  133. * ---------------------------------------------------------------------- */
  134. /*
  135. * @brief Core function for the floating-point CFFT butterfly process.
  136. * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
  137. * @param[in] fftLen length of the FFT.
  138. * @param[in] *pCoef points to the twiddle coefficient buffer.
  139. * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  140. * @return none.
  141. */
  142. void arm_radix4_butterfly_f16(
  143. float16_t * pSrc,
  144. uint16_t fftLen,
  145. const float16_t * pCoef,
  146. uint16_t twidCoefModifier)
  147. {
  148. float16_t co1, co2, co3, si1, si2, si3;
  149. uint32_t ia1, ia2, ia3;
  150. uint32_t i0, i1, i2, i3;
  151. uint32_t n1, n2, j, k;
  152. #if defined (ARM_MATH_DSP)
  153. /* Run the below code for Cortex-M4 and Cortex-M3 */
  154. float16_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
  155. float16_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
  156. Ybminusd;
  157. float16_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
  158. float16_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
  159. float16_t *ptr1;
  160. float16_t p0,p1,p2,p3,p4,p5;
  161. float16_t a0,a1,a2,a3,a4,a5,a6,a7;
  162. /* Initializations for the first stage */
  163. n2 = fftLen;
  164. n1 = n2;
  165. /* n2 = fftLen/4 */
  166. n2 >>= 2U;
  167. i0 = 0U;
  168. ia1 = 0U;
  169. j = n2;
  170. /* Calculation of first stage */
  171. do
  172. {
  173. /* index calculation for the input as, */
  174. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  175. i1 = i0 + n2;
  176. i2 = i1 + n2;
  177. i3 = i2 + n2;
  178. xaIn = pSrc[(2U * i0)];
  179. yaIn = pSrc[(2U * i0) + 1U];
  180. xbIn = pSrc[(2U * i1)];
  181. ybIn = pSrc[(2U * i1) + 1U];
  182. xcIn = pSrc[(2U * i2)];
  183. ycIn = pSrc[(2U * i2) + 1U];
  184. xdIn = pSrc[(2U * i3)];
  185. ydIn = pSrc[(2U * i3) + 1U];
  186. /* xa + xc */
  187. Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
  188. /* xb + xd */
  189. Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
  190. /* ya + yc */
  191. Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
  192. /* yb + yd */
  193. Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
  194. /* index calculation for the coefficients */
  195. ia2 = ia1 + ia1;
  196. co2 = pCoef[ia2 * 2U];
  197. si2 = pCoef[(ia2 * 2U) + 1U];
  198. /* xa - xc */
  199. Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
  200. /* xb - xd */
  201. Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
  202. /* ya - yc */
  203. Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
  204. /* yb - yd */
  205. Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
  206. /* xa' = xa + xb + xc + xd */
  207. pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
  208. /* ya' = ya + yb + yc + yd */
  209. pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
  210. /* (xa - xc) + (yb - yd) */
  211. Xb12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
  212. /* (ya - yc) + (xb - xd) */
  213. Yb12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
  214. /* (xa + xc) - (xb + xd) */
  215. Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
  216. /* (ya + yc) - (yb + yd) */
  217. Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
  218. /* (xa - xc) - (yb - yd) */
  219. Xd12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
  220. /* (ya - yc) + (xb - xd) */
  221. Yd12C_out = ((_Float16)Xbminusd + (_Float16)Yaminusc);
  222. co1 = pCoef[ia1 * 2U];
  223. si1 = pCoef[(ia1 * 2U) + 1U];
  224. /* index calculation for the coefficients */
  225. ia3 = ia2 + ia1;
  226. co3 = pCoef[ia3 * 2U];
  227. si3 = pCoef[(ia3 * 2U) + 1U];
  228. Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
  229. Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
  230. Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
  231. Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
  232. Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
  233. Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
  234. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  235. //Xb12_out -= Yb12C_out * si1;
  236. p0 = (_Float16)Yb12C_out * (_Float16)si1;
  237. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  238. //Yb12_out += Xb12C_out * si1;
  239. p1 = (_Float16)Xb12C_out * (_Float16)si1;
  240. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  241. //Xc12_out -= Yc12C_out * si2;
  242. p2 = (_Float16)Yc12C_out * (_Float16)si2;
  243. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  244. //Yc12_out += Xc12C_out * si2;
  245. p3 = (_Float16)Xc12C_out * (_Float16)si2;
  246. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  247. //Xd12_out -= Yd12C_out * si3;
  248. p4 = (_Float16)Yd12C_out * (_Float16)si3;
  249. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  250. //Yd12_out += Xd12C_out * si3;
  251. p5 = (_Float16)Xd12C_out * (_Float16)si3;
  252. Xb12_out += (_Float16)p0;
  253. Yb12_out -= (_Float16)p1;
  254. Xc12_out += (_Float16)p2;
  255. Yc12_out -= (_Float16)p3;
  256. Xd12_out += (_Float16)p4;
  257. Yd12_out -= (_Float16)p5;
  258. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  259. pSrc[2U * i1] = Xc12_out;
  260. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  261. pSrc[(2U * i1) + 1U] = Yc12_out;
  262. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  263. pSrc[2U * i2] = Xb12_out;
  264. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  265. pSrc[(2U * i2) + 1U] = Yb12_out;
  266. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  267. pSrc[2U * i3] = Xd12_out;
  268. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  269. pSrc[(2U * i3) + 1U] = Yd12_out;
  270. /* Twiddle coefficients index modifier */
  271. ia1 += twidCoefModifier;
  272. /* Updating input index */
  273. i0++;
  274. }
  275. while (--j);
  276. twidCoefModifier <<= 2U;
  277. /* Calculation of second stage to excluding last stage */
  278. for (k = fftLen >> 2U; k > 4U; k >>= 2U)
  279. {
  280. /* Initializations for the first stage */
  281. n1 = n2;
  282. n2 >>= 2U;
  283. ia1 = 0U;
  284. /* Calculation of first stage */
  285. j = 0;
  286. do
  287. {
  288. /* index calculation for the coefficients */
  289. ia2 = ia1 + ia1;
  290. ia3 = ia2 + ia1;
  291. co1 = pCoef[ia1 * 2U];
  292. si1 = pCoef[(ia1 * 2U) + 1U];
  293. co2 = pCoef[ia2 * 2U];
  294. si2 = pCoef[(ia2 * 2U) + 1U];
  295. co3 = pCoef[ia3 * 2U];
  296. si3 = pCoef[(ia3 * 2U) + 1U];
  297. /* Twiddle coefficients index modifier */
  298. ia1 += twidCoefModifier;
  299. i0 = j;
  300. do
  301. {
  302. /* index calculation for the input as, */
  303. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  304. i1 = i0 + n2;
  305. i2 = i1 + n2;
  306. i3 = i2 + n2;
  307. xaIn = pSrc[(2U * i0)];
  308. yaIn = pSrc[(2U * i0) + 1U];
  309. xbIn = pSrc[(2U * i1)];
  310. ybIn = pSrc[(2U * i1) + 1U];
  311. xcIn = pSrc[(2U * i2)];
  312. ycIn = pSrc[(2U * i2) + 1U];
  313. xdIn = pSrc[(2U * i3)];
  314. ydIn = pSrc[(2U * i3) + 1U];
  315. /* xa - xc */
  316. Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
  317. /* (xb - xd) */
  318. Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
  319. /* ya - yc */
  320. Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
  321. /* (yb - yd) */
  322. Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
  323. /* xa + xc */
  324. Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
  325. /* xb + xd */
  326. Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
  327. /* ya + yc */
  328. Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
  329. /* yb + yd */
  330. Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
  331. /* (xa - xc) + (yb - yd) */
  332. Xb12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
  333. /* (ya - yc) - (xb - xd) */
  334. Yb12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
  335. /* xa + xc -(xb + xd) */
  336. Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
  337. /* (ya + yc) - (yb + yd) */
  338. Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
  339. /* (xa - xc) - (yb - yd) */
  340. Xd12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
  341. /* (ya - yc) + (xb - xd) */
  342. Yd12C_out = ((_Float16)Xbminusd + (_Float16)Yaminusc);
  343. pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
  344. pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
  345. Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
  346. Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
  347. Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
  348. Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
  349. Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
  350. Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
  351. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  352. //Xb12_out -= Yb12C_out * si1;
  353. p0 = (_Float16)Yb12C_out * (_Float16)si1;
  354. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  355. //Yb12_out += Xb12C_out * si1;
  356. p1 = (_Float16)Xb12C_out * (_Float16)si1;
  357. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  358. //Xc12_out -= Yc12C_out * si2;
  359. p2 = (_Float16)Yc12C_out * (_Float16)si2;
  360. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  361. //Yc12_out += Xc12C_out * si2;
  362. p3 = (_Float16)Xc12C_out * (_Float16)si2;
  363. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  364. //Xd12_out -= Yd12C_out * si3;
  365. p4 = (_Float16)Yd12C_out * (_Float16)si3;
  366. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  367. //Yd12_out += Xd12C_out * si3;
  368. p5 = (_Float16)Xd12C_out * (_Float16)si3;
  369. Xb12_out += (_Float16)p0;
  370. Yb12_out -= (_Float16)p1;
  371. Xc12_out += (_Float16)p2;
  372. Yc12_out -= (_Float16)p3;
  373. Xd12_out += (_Float16)p4;
  374. Yd12_out -= (_Float16)p5;
  375. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  376. pSrc[2U * i1] = Xc12_out;
  377. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  378. pSrc[(2U * i1) + 1U] = Yc12_out;
  379. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  380. pSrc[2U * i2] = Xb12_out;
  381. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  382. pSrc[(2U * i2) + 1U] = Yb12_out;
  383. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  384. pSrc[2U * i3] = Xd12_out;
  385. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  386. pSrc[(2U * i3) + 1U] = Yd12_out;
  387. i0 += n1;
  388. } while (i0 < fftLen);
  389. j++;
  390. } while (j <= (n2 - 1U));
  391. twidCoefModifier <<= 2U;
  392. }
  393. j = fftLen >> 2;
  394. ptr1 = &pSrc[0];
  395. /* Calculations of last stage */
  396. do
  397. {
  398. xaIn = ptr1[0];
  399. yaIn = ptr1[1];
  400. xbIn = ptr1[2];
  401. ybIn = ptr1[3];
  402. xcIn = ptr1[4];
  403. ycIn = ptr1[5];
  404. xdIn = ptr1[6];
  405. ydIn = ptr1[7];
  406. /* xa + xc */
  407. Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
  408. /* xa - xc */
  409. Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
  410. /* ya + yc */
  411. Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
  412. /* ya - yc */
  413. Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
  414. /* xb + xd */
  415. Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
  416. /* yb + yd */
  417. Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
  418. /* (xb-xd) */
  419. Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
  420. /* (yb-yd) */
  421. Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
  422. /* xa' = xa + xb + xc + xd */
  423. a0 = ((_Float16)Xaplusc + (_Float16)Xbplusd);
  424. /* ya' = ya + yb + yc + yd */
  425. a1 = ((_Float16)Yaplusc + (_Float16)Ybplusd);
  426. /* xc' = (xa-xb+xc-xd) */
  427. a2 = ((_Float16)Xaplusc - (_Float16)Xbplusd);
  428. /* yc' = (ya-yb+yc-yd) */
  429. a3 = ((_Float16)Yaplusc - (_Float16)Ybplusd);
  430. /* xb' = (xa+yb-xc-yd) */
  431. a4 = ((_Float16)Xaminusc + (_Float16)Ybminusd);
  432. /* yb' = (ya-xb-yc+xd) */
  433. a5 = ((_Float16)Yaminusc - (_Float16)Xbminusd);
  434. /* xd' = (xa-yb-xc+yd)) */
  435. a6 = ((_Float16)Xaminusc - (_Float16)Ybminusd);
  436. /* yd' = (ya+xb-yc-xd) */
  437. a7 = ((_Float16)Xbminusd + (_Float16)Yaminusc);
  438. ptr1[0] = a0;
  439. ptr1[1] = a1;
  440. ptr1[2] = a2;
  441. ptr1[3] = a3;
  442. ptr1[4] = a4;
  443. ptr1[5] = a5;
  444. ptr1[6] = a6;
  445. ptr1[7] = a7;
  446. /* increment pointer by 8 */
  447. ptr1 += 8U;
  448. } while (--j);
  449. #else
  450. float16_t t1, t2, r1, r2, s1, s2;
  451. /* Run the below code for Cortex-M0 */
  452. /* Initializations for the fft calculation */
  453. n2 = fftLen;
  454. n1 = n2;
  455. for (k = fftLen; k > 1U; k >>= 2U)
  456. {
  457. /* Initializations for the fft calculation */
  458. n1 = n2;
  459. n2 >>= 2U;
  460. ia1 = 0U;
  461. /* FFT Calculation */
  462. j = 0;
  463. do
  464. {
  465. /* index calculation for the coefficients */
  466. ia2 = ia1 + ia1;
  467. ia3 = ia2 + ia1;
  468. co1 = pCoef[ia1 * 2U];
  469. si1 = pCoef[(ia1 * 2U) + 1U];
  470. co2 = pCoef[ia2 * 2U];
  471. si2 = pCoef[(ia2 * 2U) + 1U];
  472. co3 = pCoef[ia3 * 2U];
  473. si3 = pCoef[(ia3 * 2U) + 1U];
  474. /* Twiddle coefficients index modifier */
  475. ia1 = ia1 + twidCoefModifier;
  476. i0 = j;
  477. do
  478. {
  479. /* index calculation for the input as, */
  480. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  481. i1 = i0 + n2;
  482. i2 = i1 + n2;
  483. i3 = i2 + n2;
  484. /* xa + xc */
  485. r1 = (_Float16)pSrc[(2U * i0)] + (_Float16)pSrc[(2U * i2)];
  486. /* xa - xc */
  487. r2 = (_Float16)pSrc[(2U * i0)] - (_Float16)pSrc[(2U * i2)];
  488. /* ya + yc */
  489. s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
  490. /* ya - yc */
  491. s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
  492. /* xb + xd */
  493. t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
  494. /* xa' = xa + xb + xc + xd */
  495. pSrc[2U * i0] = (_Float16)r1 + (_Float16)t1;
  496. /* xa + xc -(xb + xd) */
  497. r1 = (_Float16)r1 - (_Float16)t1;
  498. /* yb + yd */
  499. t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
  500. /* ya' = ya + yb + yc + yd */
  501. pSrc[(2U * i0) + 1U] = (_Float16)s1 + (_Float16)t2;
  502. /* (ya + yc) - (yb + yd) */
  503. s1 = (_Float16)s1 - (_Float16)t2;
  504. /* (yb - yd) */
  505. t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
  506. /* (xb - xd) */
  507. t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
  508. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  509. pSrc[2U * i1] = ((_Float16)r1 * (_Float16)co2) + ((_Float16)s1 * (_Float16)si2);
  510. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  511. pSrc[(2U * i1) + 1U] = ((_Float16)s1 * (_Float16)co2) - ((_Float16)r1 * (_Float16)si2);
  512. /* (xa - xc) + (yb - yd) */
  513. r1 = (_Float16)r2 + (_Float16)t1;
  514. /* (xa - xc) - (yb - yd) */
  515. r2 = (_Float16)r2 - (_Float16)t1;
  516. /* (ya - yc) - (xb - xd) */
  517. s1 = (_Float16)s2 - (_Float16)t2;
  518. /* (ya - yc) + (xb - xd) */
  519. s2 = (_Float16)s2 + (_Float16)t2;
  520. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  521. pSrc[2U * i2] = ((_Float16)r1 * (_Float16)co1) + ((_Float16)s1 * (_Float16)si1);
  522. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  523. pSrc[(2U * i2) + 1U] = ((_Float16)s1 * (_Float16)co1) - ((_Float16)r1 * (_Float16)si1);
  524. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  525. pSrc[2U * i3] = ((_Float16)r2 * (_Float16)co3) + ((_Float16)s2 * (_Float16)si3);
  526. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  527. pSrc[(2U * i3) + 1U] = ((_Float16)s2 * (_Float16)co3) - ((_Float16)r2 * (_Float16)si3);
  528. i0 += n1;
  529. } while ( i0 < fftLen);
  530. j++;
  531. } while (j <= (n2 - 1U));
  532. twidCoefModifier <<= 2U;
  533. }
  534. #endif /* #if defined (ARM_MATH_DSP) */
  535. }
  536. /*
  537. * @brief Core function for the floating-point CIFFT butterfly process.
  538. * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
  539. * @param[in] fftLen length of the FFT.
  540. * @param[in] *pCoef points to twiddle coefficient buffer.
  541. * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  542. * @param[in] onebyfftLen value of 1/fftLen.
  543. * @return none.
  544. */
  545. void arm_radix4_butterfly_inverse_f16(
  546. float16_t * pSrc,
  547. uint16_t fftLen,
  548. const float16_t * pCoef,
  549. uint16_t twidCoefModifier,
  550. float16_t onebyfftLen)
  551. {
  552. float16_t co1, co2, co3, si1, si2, si3;
  553. uint32_t ia1, ia2, ia3;
  554. uint32_t i0, i1, i2, i3;
  555. uint32_t n1, n2, j, k;
  556. #if defined (ARM_MATH_DSP)
  557. float16_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
  558. float16_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
  559. Ybminusd;
  560. float16_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
  561. float16_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
  562. float16_t *ptr1;
  563. float16_t p0,p1,p2,p3,p4,p5,p6,p7;
  564. float16_t a0,a1,a2,a3,a4,a5,a6,a7;
  565. /* Initializations for the first stage */
  566. n2 = fftLen;
  567. n1 = n2;
  568. /* n2 = fftLen/4 */
  569. n2 >>= 2U;
  570. i0 = 0U;
  571. ia1 = 0U;
  572. j = n2;
  573. /* Calculation of first stage */
  574. do
  575. {
  576. /* index calculation for the input as, */
  577. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  578. i1 = i0 + n2;
  579. i2 = i1 + n2;
  580. i3 = i2 + n2;
  581. /* Butterfly implementation */
  582. xaIn = pSrc[(2U * i0)];
  583. yaIn = pSrc[(2U * i0) + 1U];
  584. xcIn = pSrc[(2U * i2)];
  585. ycIn = pSrc[(2U * i2) + 1U];
  586. xbIn = pSrc[(2U * i1)];
  587. ybIn = pSrc[(2U * i1) + 1U];
  588. xdIn = pSrc[(2U * i3)];
  589. ydIn = pSrc[(2U * i3) + 1U];
  590. /* xa + xc */
  591. Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
  592. /* xb + xd */
  593. Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
  594. /* ya + yc */
  595. Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
  596. /* yb + yd */
  597. Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
  598. /* index calculation for the coefficients */
  599. ia2 = ia1 + ia1;
  600. co2 = pCoef[ia2 * 2U];
  601. si2 = pCoef[(ia2 * 2U) + 1U];
  602. /* xa - xc */
  603. Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
  604. /* xb - xd */
  605. Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
  606. /* ya - yc */
  607. Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
  608. /* yb - yd */
  609. Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
  610. /* xa' = xa + xb + xc + xd */
  611. pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
  612. /* ya' = ya + yb + yc + yd */
  613. pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
  614. /* (xa - xc) - (yb - yd) */
  615. Xb12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
  616. /* (ya - yc) + (xb - xd) */
  617. Yb12C_out = ((_Float16)Yaminusc + (_Float16)Xbminusd);
  618. /* (xa + xc) - (xb + xd) */
  619. Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
  620. /* (ya + yc) - (yb + yd) */
  621. Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
  622. /* (xa - xc) + (yb - yd) */
  623. Xd12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
  624. /* (ya - yc) - (xb - xd) */
  625. Yd12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
  626. co1 = pCoef[ia1 * 2U];
  627. si1 = pCoef[(ia1 * 2U) + 1U];
  628. /* index calculation for the coefficients */
  629. ia3 = ia2 + ia1;
  630. co3 = pCoef[ia3 * 2U];
  631. si3 = pCoef[(ia3 * 2U) + 1U];
  632. Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
  633. Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
  634. Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
  635. Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
  636. Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
  637. Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
  638. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  639. //Xb12_out -= Yb12C_out * si1;
  640. p0 = (_Float16)Yb12C_out * (_Float16)si1;
  641. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  642. //Yb12_out += Xb12C_out * si1;
  643. p1 = (_Float16)Xb12C_out * (_Float16)si1;
  644. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  645. //Xc12_out -= Yc12C_out * si2;
  646. p2 = (_Float16)Yc12C_out * (_Float16)si2;
  647. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  648. //Yc12_out += Xc12C_out * si2;
  649. p3 = (_Float16)Xc12C_out * (_Float16)si2;
  650. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  651. //Xd12_out -= Yd12C_out * si3;
  652. p4 = (_Float16)Yd12C_out * (_Float16)si3;
  653. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  654. //Yd12_out += Xd12C_out * si3;
  655. p5 =(_Float16) Xd12C_out * (_Float16)si3;
  656. Xb12_out -= (_Float16)p0;
  657. Yb12_out += (_Float16)p1;
  658. Xc12_out -= (_Float16)p2;
  659. Yc12_out += (_Float16)p3;
  660. Xd12_out -= (_Float16)p4;
  661. Yd12_out += (_Float16)p5;
  662. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  663. pSrc[2U * i1] = Xc12_out;
  664. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  665. pSrc[(2U * i1) + 1U] = Yc12_out;
  666. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  667. pSrc[2U * i2] = Xb12_out;
  668. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  669. pSrc[(2U * i2) + 1U] = Yb12_out;
  670. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  671. pSrc[2U * i3] = Xd12_out;
  672. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  673. pSrc[(2U * i3) + 1U] = Yd12_out;
  674. /* Twiddle coefficients index modifier */
  675. ia1 = ia1 + twidCoefModifier;
  676. /* Updating input index */
  677. i0 = i0 + 1U;
  678. } while (--j);
  679. twidCoefModifier <<= 2U;
  680. /* Calculation of second stage to excluding last stage */
  681. for (k = fftLen >> 2U; k > 4U; k >>= 2U)
  682. {
  683. /* Initializations for the first stage */
  684. n1 = n2;
  685. n2 >>= 2U;
  686. ia1 = 0U;
  687. /* Calculation of first stage */
  688. j = 0;
  689. do
  690. {
  691. /* index calculation for the coefficients */
  692. ia2 = ia1 + ia1;
  693. ia3 = ia2 + ia1;
  694. co1 = pCoef[ia1 * 2U];
  695. si1 = pCoef[(ia1 * 2U) + 1U];
  696. co2 = pCoef[ia2 * 2U];
  697. si2 = pCoef[(ia2 * 2U) + 1U];
  698. co3 = pCoef[ia3 * 2U];
  699. si3 = pCoef[(ia3 * 2U) + 1U];
  700. /* Twiddle coefficients index modifier */
  701. ia1 = ia1 + twidCoefModifier;
  702. i0 = j;
  703. do
  704. {
  705. /* index calculation for the input as, */
  706. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  707. i1 = i0 + n2;
  708. i2 = i1 + n2;
  709. i3 = i2 + n2;
  710. xaIn = pSrc[(2U * i0)];
  711. yaIn = pSrc[(2U * i0) + 1U];
  712. xbIn = pSrc[(2U * i1)];
  713. ybIn = pSrc[(2U * i1) + 1U];
  714. xcIn = pSrc[(2U * i2)];
  715. ycIn = pSrc[(2U * i2) + 1U];
  716. xdIn = pSrc[(2U * i3)];
  717. ydIn = pSrc[(2U * i3) + 1U];
  718. /* xa - xc */
  719. Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
  720. /* (xb - xd) */
  721. Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
  722. /* ya - yc */
  723. Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
  724. /* (yb - yd) */
  725. Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
  726. /* xa + xc */
  727. Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
  728. /* xb + xd */
  729. Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
  730. /* ya + yc */
  731. Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
  732. /* yb + yd */
  733. Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
  734. /* (xa - xc) - (yb - yd) */
  735. Xb12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
  736. /* (ya - yc) + (xb - xd) */
  737. Yb12C_out = ((_Float16)Yaminusc + (_Float16)Xbminusd);
  738. /* xa + xc -(xb + xd) */
  739. Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
  740. /* (ya + yc) - (yb + yd) */
  741. Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
  742. /* (xa - xc) + (yb - yd) */
  743. Xd12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
  744. /* (ya - yc) - (xb - xd) */
  745. Yd12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
  746. pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
  747. pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
  748. Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
  749. Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
  750. Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
  751. Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
  752. Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
  753. Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
  754. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  755. //Xb12_out -= Yb12C_out * si1;
  756. p0 = (_Float16)Yb12C_out * (_Float16)si1;
  757. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  758. //Yb12_out += Xb12C_out * si1;
  759. p1 = (_Float16)Xb12C_out * (_Float16)si1;
  760. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  761. //Xc12_out -= Yc12C_out * si2;
  762. p2 = (_Float16)Yc12C_out * (_Float16)si2;
  763. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  764. //Yc12_out += Xc12C_out * si2;
  765. p3 = (_Float16)Xc12C_out * (_Float16)si2;
  766. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  767. //Xd12_out -= Yd12C_out * si3;
  768. p4 = (_Float16)Yd12C_out * (_Float16)si3;
  769. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  770. //Yd12_out += Xd12C_out * si3;
  771. p5 = (_Float16)Xd12C_out * (_Float16)si3;
  772. Xb12_out -= (_Float16)p0;
  773. Yb12_out += (_Float16)p1;
  774. Xc12_out -= (_Float16)p2;
  775. Yc12_out += (_Float16)p3;
  776. Xd12_out -= (_Float16)p4;
  777. Yd12_out += (_Float16)p5;
  778. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  779. pSrc[2U * i1] = Xc12_out;
  780. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  781. pSrc[(2U * i1) + 1U] = Yc12_out;
  782. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  783. pSrc[2U * i2] = Xb12_out;
  784. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  785. pSrc[(2U * i2) + 1U] = Yb12_out;
  786. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  787. pSrc[2U * i3] = Xd12_out;
  788. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  789. pSrc[(2U * i3) + 1U] = Yd12_out;
  790. i0 += n1;
  791. } while (i0 < fftLen);
  792. j++;
  793. } while (j <= (n2 - 1U));
  794. twidCoefModifier <<= 2U;
  795. }
  796. /* Initializations of last stage */
  797. j = fftLen >> 2;
  798. ptr1 = &pSrc[0];
  799. /* Calculations of last stage */
  800. do
  801. {
  802. xaIn = ptr1[0];
  803. yaIn = ptr1[1];
  804. xbIn = ptr1[2];
  805. ybIn = ptr1[3];
  806. xcIn = ptr1[4];
  807. ycIn = ptr1[5];
  808. xdIn = ptr1[6];
  809. ydIn = ptr1[7];
  810. /* Butterfly implementation */
  811. /* xa + xc */
  812. Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
  813. /* xa - xc */
  814. Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
  815. /* ya + yc */
  816. Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
  817. /* ya - yc */
  818. Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
  819. /* xb + xd */
  820. Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
  821. /* yb + yd */
  822. Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
  823. /* (xb-xd) */
  824. Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
  825. /* (yb-yd) */
  826. Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
  827. /* xa' = (xa+xb+xc+xd) * onebyfftLen */
  828. a0 = ((_Float16)Xaplusc + (_Float16)Xbplusd);
  829. /* ya' = (ya+yb+yc+yd) * onebyfftLen */
  830. a1 = ((_Float16)Yaplusc + (_Float16)Ybplusd);
  831. /* xc' = (xa-xb+xc-xd) * onebyfftLen */
  832. a2 = ((_Float16)Xaplusc - (_Float16)Xbplusd);
  833. /* yc' = (ya-yb+yc-yd) * onebyfftLen */
  834. a3 = ((_Float16)Yaplusc - (_Float16)Ybplusd);
  835. /* xb' = (xa-yb-xc+yd) * onebyfftLen */
  836. a4 = ((_Float16)Xaminusc - (_Float16)Ybminusd);
  837. /* yb' = (ya+xb-yc-xd) * onebyfftLen */
  838. a5 = ((_Float16)Yaminusc + (_Float16)Xbminusd);
  839. /* xd' = (xa-yb-xc+yd) * onebyfftLen */
  840. a6 = ((_Float16)Xaminusc + (_Float16)Ybminusd);
  841. /* yd' = (ya-xb-yc+xd) * onebyfftLen */
  842. a7 = ((_Float16)Yaminusc - (_Float16)Xbminusd);
  843. p0 = (_Float16)a0 * (_Float16)onebyfftLen;
  844. p1 = (_Float16)a1 * (_Float16)onebyfftLen;
  845. p2 = (_Float16)a2 * (_Float16)onebyfftLen;
  846. p3 = (_Float16)a3 * (_Float16)onebyfftLen;
  847. p4 = (_Float16)a4 * (_Float16)onebyfftLen;
  848. p5 = (_Float16)a5 * (_Float16)onebyfftLen;
  849. p6 = (_Float16)a6 * (_Float16)onebyfftLen;
  850. p7 = (_Float16)a7 * (_Float16)onebyfftLen;
  851. /* xa' = (xa+xb+xc+xd) * onebyfftLen */
  852. ptr1[0] = p0;
  853. /* ya' = (ya+yb+yc+yd) * onebyfftLen */
  854. ptr1[1] = p1;
  855. /* xc' = (xa-xb+xc-xd) * onebyfftLen */
  856. ptr1[2] = p2;
  857. /* yc' = (ya-yb+yc-yd) * onebyfftLen */
  858. ptr1[3] = p3;
  859. /* xb' = (xa-yb-xc+yd) * onebyfftLen */
  860. ptr1[4] = p4;
  861. /* yb' = (ya+xb-yc-xd) * onebyfftLen */
  862. ptr1[5] = p5;
  863. /* xd' = (xa-yb-xc+yd) * onebyfftLen */
  864. ptr1[6] = p6;
  865. /* yd' = (ya-xb-yc+xd) * onebyfftLen */
  866. ptr1[7] = p7;
  867. /* increment source pointer by 8 for next calculations */
  868. ptr1 = ptr1 + 8U;
  869. } while (--j);
  870. #else
  871. float16_t t1, t2, r1, r2, s1, s2;
  872. /* Run the below code for Cortex-M0 */
  873. /* Initializations for the first stage */
  874. n2 = fftLen;
  875. n1 = n2;
  876. /* Calculation of first stage */
  877. for (k = fftLen; k > 4U; k >>= 2U)
  878. {
  879. /* Initializations for the first stage */
  880. n1 = n2;
  881. n2 >>= 2U;
  882. ia1 = 0U;
  883. /* Calculation of first stage */
  884. j = 0;
  885. do
  886. {
  887. /* index calculation for the coefficients */
  888. ia2 = ia1 + ia1;
  889. ia3 = ia2 + ia1;
  890. co1 = pCoef[ia1 * 2U];
  891. si1 = pCoef[(ia1 * 2U) + 1U];
  892. co2 = pCoef[ia2 * 2U];
  893. si2 = pCoef[(ia2 * 2U) + 1U];
  894. co3 = pCoef[ia3 * 2U];
  895. si3 = pCoef[(ia3 * 2U) + 1U];
  896. /* Twiddle coefficients index modifier */
  897. ia1 = ia1 + twidCoefModifier;
  898. i0 = j;
  899. do
  900. {
  901. /* index calculation for the input as, */
  902. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  903. i1 = i0 + n2;
  904. i2 = i1 + n2;
  905. i3 = i2 + n2;
  906. /* xa + xc */
  907. r1 = (_Float16)pSrc[(2U * i0)] + (_Float16)pSrc[(2U * i2)];
  908. /* xa - xc */
  909. r2 = (_Float16)pSrc[(2U * i0)] - (_Float16)pSrc[(2U * i2)];
  910. /* ya + yc */
  911. s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
  912. /* ya - yc */
  913. s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
  914. /* xb + xd */
  915. t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
  916. /* xa' = xa + xb + xc + xd */
  917. pSrc[2U * i0] = (_Float16)r1 + (_Float16)t1;
  918. /* xa + xc -(xb + xd) */
  919. r1 = (_Float16)r1 - (_Float16)t1;
  920. /* yb + yd */
  921. t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
  922. /* ya' = ya + yb + yc + yd */
  923. pSrc[(2U * i0) + 1U] = (_Float16)s1 + (_Float16)t2;
  924. /* (ya + yc) - (yb + yd) */
  925. s1 = (_Float16)s1 - (_Float16)t2;
  926. /* (yb - yd) */
  927. t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
  928. /* (xb - xd) */
  929. t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
  930. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  931. pSrc[2U * i1] = ((_Float16)r1 * (_Float16)co2) - ((_Float16)s1 * (_Float16)si2);
  932. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  933. pSrc[(2U * i1) + 1U] = ((_Float16)s1 * (_Float16)co2) + ((_Float16)r1 * (_Float16)si2);
  934. /* (xa - xc) - (yb - yd) */
  935. r1 = (_Float16)r2 - (_Float16)t1;
  936. /* (xa - xc) + (yb - yd) */
  937. r2 = (_Float16)r2 + (_Float16)t1;
  938. /* (ya - yc) + (xb - xd) */
  939. s1 = (_Float16)s2 + (_Float16)t2;
  940. /* (ya - yc) - (xb - xd) */
  941. s2 = (_Float16)s2 - (_Float16)t2;
  942. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  943. pSrc[2U * i2] = ((_Float16)r1 * (_Float16)co1) - ((_Float16)s1 * (_Float16)si1);
  944. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  945. pSrc[(2U * i2) + 1U] = ((_Float16)s1 * (_Float16)co1) + ((_Float16)r1 * (_Float16)si1);
  946. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  947. pSrc[2U * i3] = ((_Float16)r2 * (_Float16)co3) - ((_Float16)s2 * (_Float16)si3);
  948. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  949. pSrc[(2U * i3) + 1U] = ((_Float16)s2 * (_Float16)co3) + ((_Float16)r2 * (_Float16)si3);
  950. i0 += n1;
  951. } while ( i0 < fftLen);
  952. j++;
  953. } while (j <= (n2 - 1U));
  954. twidCoefModifier <<= 2U;
  955. }
  956. /* Initializations of last stage */
  957. n1 = n2;
  958. n2 >>= 2U;
  959. /* Calculations of last stage */
  960. for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
  961. {
  962. /* index calculation for the input as, */
  963. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  964. i1 = i0 + n2;
  965. i2 = i1 + n2;
  966. i3 = i2 + n2;
  967. /* Butterfly implementation */
  968. /* xa + xc */
  969. r1 = (_Float16)pSrc[2U * i0] + (_Float16)pSrc[2U * i2];
  970. /* xa - xc */
  971. r2 = (_Float16)pSrc[2U * i0] - (_Float16)pSrc[2U * i2];
  972. /* ya + yc */
  973. s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
  974. /* ya - yc */
  975. s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
  976. /* xc + xd */
  977. t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
  978. /* xa' = xa + xb + xc + xd */
  979. pSrc[2U * i0] = ((_Float16)r1 + (_Float16)t1) * (_Float16)onebyfftLen;
  980. /* (xa + xb) - (xc + xd) */
  981. r1 = (_Float16)r1 - (_Float16)t1;
  982. /* yb + yd */
  983. t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
  984. /* ya' = ya + yb + yc + yd */
  985. pSrc[(2U * i0) + 1U] = ((_Float16)s1 + (_Float16)t2) * (_Float16)onebyfftLen;
  986. /* (ya + yc) - (yb + yd) */
  987. s1 = (_Float16)s1 - (_Float16)t2;
  988. /* (yb-yd) */
  989. t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
  990. /* (xb-xd) */
  991. t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
  992. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  993. pSrc[2U * i1] = (_Float16)r1 * (_Float16)onebyfftLen;
  994. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  995. pSrc[(2U * i1) + 1U] = (_Float16)s1 * (_Float16)onebyfftLen;
  996. /* (xa - xc) - (yb-yd) */
  997. r1 = (_Float16)r2 - (_Float16)t1;
  998. /* (xa - xc) + (yb-yd) */
  999. r2 = (_Float16)r2 + (_Float16)t1;
  1000. /* (ya - yc) + (xb-xd) */
  1001. s1 = (_Float16)s2 + (_Float16)t2;
  1002. /* (ya - yc) - (xb-xd) */
  1003. s2 = (_Float16)s2 - (_Float16)t2;
  1004. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  1005. pSrc[2U * i2] = (_Float16)r1 * (_Float16)onebyfftLen;
  1006. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  1007. pSrc[(2U * i2) + 1U] = (_Float16)s1 * (_Float16)onebyfftLen;
  1008. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  1009. pSrc[2U * i3] = (_Float16)r2 * (_Float16)onebyfftLen;
  1010. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  1011. pSrc[(2U * i3) + 1U] = (_Float16)s2 * (_Float16)onebyfftLen;
  1012. }
  1013. #endif /* #if defined (ARM_MATH_DSP) */
  1014. }
  1015. #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */