arm_mat_inverse_f32.c 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_mat_inverse_f32.c
  4. * Description: Floating-point matrix inverse
  5. *
  6. * $Date: 23 April 2021
  7. * $Revision: V1.9.0
  8. *
  9. * Target Processor: Cortex-M and Cortex-A cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/matrix_functions.h"
  29. /**
  30. @ingroup groupMatrix
  31. */
  32. /**
  33. @defgroup MatrixInv Matrix Inverse
  34. Computes the inverse of a matrix.
  35. The inverse is defined only if the input matrix is square and non-singular (the determinant is non-zero).
  36. The function checks that the input and output matrices are square and of the same size.
  37. Matrix inversion is numerically sensitive and the CMSIS DSP library only supports matrix
  38. inversion of floating-point matrices.
  39. @par Algorithm
  40. The Gauss-Jordan method is used to find the inverse.
  41. The algorithm performs a sequence of elementary row-operations until it
  42. reduces the input matrix to an identity matrix. Applying the same sequence
  43. of elementary row-operations to an identity matrix yields the inverse matrix.
  44. If the input matrix is singular, then the algorithm terminates and returns error status
  45. <code>ARM_MATH_SINGULAR</code>.
  46. \image html MatrixInverse.gif "Matrix Inverse of a 3 x 3 matrix using Gauss-Jordan Method"
  47. */
  48. /**
  49. @addtogroup MatrixInv
  50. @{
  51. */
  52. /**
  53. @brief Floating-point matrix inverse.
  54. @param[in] pSrc points to input matrix structure. The source matrix is modified by the function.
  55. @param[out] pDst points to output matrix structure
  56. @return execution status
  57. - \ref ARM_MATH_SUCCESS : Operation successful
  58. - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
  59. - \ref ARM_MATH_SINGULAR : Input matrix is found to be singular (non-invertible)
  60. */
  61. #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
  62. arm_status arm_mat_inverse_f32(
  63. const arm_matrix_instance_f32 * pSrc,
  64. arm_matrix_instance_f32 * pDst)
  65. {
  66. float32_t *pIn = pSrc->pData; /* input data matrix pointer */
  67. float32_t *pOut = pDst->pData; /* output data matrix pointer */
  68. float32_t *pInT1, *pInT2; /* Temporary input data matrix pointer */
  69. float32_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
  70. float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */
  71. uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */
  72. uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */
  73. float32_t *pTmpA, *pTmpB;
  74. float32_t in = 0.0f; /* Temporary input values */
  75. uint32_t i, rowCnt, flag = 0U, j, loopCnt, l; /* loop counters */
  76. arm_status status; /* status of matrix inverse */
  77. uint32_t blkCnt;
  78. #ifdef ARM_MATH_MATRIX_CHECK
  79. /* Check for matrix mismatch condition */
  80. if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols)
  81. || (pSrc->numRows != pDst->numRows))
  82. {
  83. /* Set status as ARM_MATH_SIZE_MISMATCH */
  84. status = ARM_MATH_SIZE_MISMATCH;
  85. }
  86. else
  87. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  88. {
  89. /*--------------------------------------------------------------------------------------------------------------
  90. * Matrix Inverse can be solved using elementary row operations.
  91. *
  92. * Gauss-Jordan Method:
  93. *
  94. * 1. First combine the identity matrix and the input matrix separated by a bar to form an
  95. * augmented matrix as follows:
  96. * _ _ _ _ _ _ _ _
  97. * | | a11 a12 | | | 1 0 | | | X11 X12 |
  98. * | | | | | | | = | |
  99. * |_ |_ a21 a22 _| | |_0 1 _| _| |_ X21 X21 _|
  100. *
  101. * 2. In our implementation, pDst Matrix is used as identity matrix.
  102. *
  103. * 3. Begin with the first row. Let i = 1.
  104. *
  105. * 4. Check to see if the pivot for row i is zero.
  106. * The pivot is the element of the main diagonal that is on the current row.
  107. * For instance, if working with row i, then the pivot element is aii.
  108. * If the pivot is zero, exchange that row with a row below it that does not
  109. * contain a zero in column i. If this is not possible, then an inverse
  110. * to that matrix does not exist.
  111. *
  112. * 5. Divide every element of row i by the pivot.
  113. *
  114. * 6. For every row below and row i, replace that row with the sum of that row and
  115. * a multiple of row i so that each new element in column i below row i is zero.
  116. *
  117. * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
  118. * for every element below and above the main diagonal.
  119. *
  120. * 8. Now an identical matrix is formed to the left of the bar(input matrix, src).
  121. * Therefore, the matrix to the right of the bar is our solution(dst matrix, dst).
  122. *----------------------------------------------------------------------------------------------------------------*/
  123. /*
  124. * Working pointer for destination matrix
  125. */
  126. pOutT1 = pOut;
  127. /*
  128. * Loop over the number of rows
  129. */
  130. rowCnt = numRows;
  131. /*
  132. * Making the destination matrix as identity matrix
  133. */
  134. while (rowCnt > 0U)
  135. {
  136. /*
  137. * Writing all zeroes in lower triangle of the destination matrix
  138. */
  139. j = numRows - rowCnt;
  140. while (j > 0U)
  141. {
  142. *pOutT1++ = 0.0f;
  143. j--;
  144. }
  145. /*
  146. * Writing all ones in the diagonal of the destination matrix
  147. */
  148. *pOutT1++ = 1.0f;
  149. /*
  150. * Writing all zeroes in upper triangle of the destination matrix
  151. */
  152. j = rowCnt - 1U;
  153. while (j > 0U)
  154. {
  155. *pOutT1++ = 0.0f;
  156. j--;
  157. }
  158. /*
  159. * Decrement the loop counter
  160. */
  161. rowCnt--;
  162. }
  163. /*
  164. * Loop over the number of columns of the input matrix.
  165. * All the elements in each column are processed by the row operations
  166. */
  167. loopCnt = numCols;
  168. /*
  169. * Index modifier to navigate through the columns
  170. */
  171. l = 0U;
  172. while (loopCnt > 0U)
  173. {
  174. /*
  175. * Check if the pivot element is zero..
  176. * If it is zero then interchange the row with non zero row below.
  177. * If there is no non zero element to replace in the rows below,
  178. * then the matrix is Singular.
  179. */
  180. /*
  181. * Working pointer for the input matrix that points
  182. * * to the pivot element of the particular row
  183. */
  184. pInT1 = pIn + (l * numCols);
  185. /*
  186. * Working pointer for the destination matrix that points
  187. * * to the pivot element of the particular row
  188. */
  189. pOutT1 = pOut + (l * numCols);
  190. /*
  191. * Temporary variable to hold the pivot value
  192. */
  193. in = *pInT1;
  194. /*
  195. * Check if the pivot element is zero
  196. */
  197. if (*pInT1 == 0.0f)
  198. {
  199. /*
  200. * Loop over the number rows present below
  201. */
  202. for (i = 1U; i < numRows-l; i++)
  203. {
  204. /*
  205. * Update the input and destination pointers
  206. */
  207. pInT2 = pInT1 + (numCols * i);
  208. pOutT2 = pOutT1 + (numCols * i);
  209. /*
  210. * Check if there is a non zero pivot element to
  211. * * replace in the rows below
  212. */
  213. if (*pInT2 != 0.0f)
  214. {
  215. f32x4_t vecA, vecB;
  216. /*
  217. * Loop over number of columns
  218. * * to the right of the pilot element
  219. */
  220. pTmpA = pInT1;
  221. pTmpB = pInT2;
  222. blkCnt = (numCols - l) >> 2;
  223. while (blkCnt > 0U)
  224. {
  225. vecA = vldrwq_f32(pTmpA);
  226. vecB = vldrwq_f32(pTmpB);
  227. vstrwq_f32(pTmpB, vecA);
  228. vstrwq_f32(pTmpA, vecB);
  229. pTmpA += 4;
  230. pTmpB += 4;
  231. /*
  232. * Decrement the blockSize loop counter
  233. */
  234. blkCnt--;
  235. }
  236. /*
  237. * tail
  238. * (will be merged thru tail predication)
  239. */
  240. blkCnt = (numCols - l) & 3;
  241. if (blkCnt > 0U)
  242. {
  243. mve_pred16_t p0 = vctp32q(blkCnt);
  244. vecA = vldrwq_f32(pTmpA);
  245. vecB = vldrwq_f32(pTmpB);
  246. vstrwq_p_f32(pTmpB, vecA, p0);
  247. vstrwq_p_f32(pTmpA, vecB, p0);
  248. }
  249. pInT1 += numCols - l;
  250. pInT2 += numCols - l;
  251. pTmpA = pOutT1;
  252. pTmpB = pOutT2;
  253. blkCnt = numCols >> 2;
  254. while (blkCnt > 0U)
  255. {
  256. vecA = vldrwq_f32(pTmpA);
  257. vecB = vldrwq_f32(pTmpB);
  258. vstrwq_f32(pTmpB, vecA);
  259. vstrwq_f32(pTmpA, vecB);
  260. pTmpA += 4;
  261. pTmpB += 4;
  262. /*
  263. * Decrement the blockSize loop counter
  264. */
  265. blkCnt--;
  266. }
  267. /*
  268. * tail
  269. */
  270. blkCnt = numCols & 3;
  271. if (blkCnt > 0U)
  272. {
  273. mve_pred16_t p0 = vctp32q(blkCnt);
  274. vecA = vldrwq_f32(pTmpA);
  275. vecB = vldrwq_f32(pTmpB);
  276. vstrwq_p_f32(pTmpB, vecA, p0);
  277. vstrwq_p_f32(pTmpA, vecB, p0);
  278. }
  279. pOutT1 += numCols;
  280. pOutT2 += numCols;
  281. /*
  282. * Flag to indicate whether exchange is done or not
  283. */
  284. flag = 1U;
  285. /*
  286. * Break after exchange is done
  287. */
  288. break;
  289. }
  290. }
  291. }
  292. /*
  293. * Update the status if the matrix is singular
  294. */
  295. if ((flag != 1U) && (in == 0.0f))
  296. {
  297. return ARM_MATH_SINGULAR;
  298. }
  299. /*
  300. * Points to the pivot row of input and destination matrices
  301. */
  302. pPivotRowIn = pIn + (l * numCols);
  303. pPivotRowDst = pOut + (l * numCols);
  304. /*
  305. * Temporary pointers to the pivot row pointers
  306. */
  307. pInT1 = pPivotRowIn;
  308. pOutT1 = pPivotRowDst;
  309. /*
  310. * Pivot element of the row
  311. */
  312. in = *(pIn + (l * numCols));
  313. pTmpA = pInT1;
  314. f32x4_t invIn = vdupq_n_f32(1.0f / in);
  315. blkCnt = (numCols - l) >> 2;
  316. f32x4_t vecA;
  317. while (blkCnt > 0U)
  318. {
  319. *(f32x4_t *) pTmpA = *(f32x4_t *) pTmpA * invIn;
  320. pTmpA += 4;
  321. /*
  322. * Decrement the blockSize loop counter
  323. */
  324. blkCnt--;
  325. }
  326. /*
  327. * tail
  328. */
  329. blkCnt = (numCols - l) & 3;
  330. if (blkCnt > 0U)
  331. {
  332. mve_pred16_t p0 = vctp32q(blkCnt);
  333. vecA = vldrwq_f32(pTmpA);
  334. vecA = vecA * invIn;
  335. vstrwq_p_f32(pTmpA, vecA, p0);
  336. }
  337. pInT1 += numCols - l;
  338. /*
  339. * Loop over number of columns
  340. * * to the right of the pilot element
  341. */
  342. pTmpA = pOutT1;
  343. blkCnt = numCols >> 2;
  344. while (blkCnt > 0U)
  345. {
  346. *(f32x4_t *) pTmpA = *(f32x4_t *) pTmpA *invIn;
  347. pTmpA += 4;
  348. /*
  349. * Decrement the blockSize loop counter
  350. */
  351. blkCnt--;
  352. }
  353. /*
  354. * tail
  355. * (will be merged thru tail predication)
  356. */
  357. blkCnt = numCols & 3;
  358. if (blkCnt > 0U)
  359. {
  360. mve_pred16_t p0 = vctp32q(blkCnt);
  361. vecA = vldrwq_f32(pTmpA);
  362. vecA = vecA * invIn;
  363. vstrwq_p_f32(pTmpA, vecA, p0);
  364. }
  365. pOutT1 += numCols;
  366. /*
  367. * Replace the rows with the sum of that row and a multiple of row i
  368. * * so that each new element in column i above row i is zero.
  369. */
  370. /*
  371. * Temporary pointers for input and destination matrices
  372. */
  373. pInT1 = pIn;
  374. pOutT1 = pOut;
  375. for (i = 0U; i < numRows; i++)
  376. {
  377. /*
  378. * Check for the pivot element
  379. */
  380. if (i == l)
  381. {
  382. /*
  383. * If the processing element is the pivot element,
  384. * only the columns to the right are to be processed
  385. */
  386. pInT1 += numCols - l;
  387. pOutT1 += numCols;
  388. }
  389. else
  390. {
  391. /*
  392. * Element of the reference row
  393. */
  394. /*
  395. * Working pointers for input and destination pivot rows
  396. */
  397. pPRT_in = pPivotRowIn;
  398. pPRT_pDst = pPivotRowDst;
  399. /*
  400. * Loop over the number of columns to the right of the pivot element,
  401. * to replace the elements in the input matrix
  402. */
  403. in = *pInT1;
  404. f32x4_t tmpV = vdupq_n_f32(in);
  405. blkCnt = (numCols - l) >> 2;
  406. while (blkCnt > 0U)
  407. {
  408. f32x4_t vec1, vec2;
  409. /*
  410. * Replace the element by the sum of that row
  411. * and a multiple of the reference row
  412. */
  413. vec1 = vldrwq_f32(pInT1);
  414. vec2 = vldrwq_f32(pPRT_in);
  415. vec1 = vfmsq_f32(vec1, tmpV, vec2);
  416. vstrwq_f32(pInT1, vec1);
  417. pPRT_in += 4;
  418. pInT1 += 4;
  419. /*
  420. * Decrement the blockSize loop counter
  421. */
  422. blkCnt--;
  423. }
  424. /*
  425. * tail
  426. * (will be merged thru tail predication)
  427. */
  428. blkCnt = (numCols - l) & 3;
  429. if (blkCnt > 0U)
  430. {
  431. f32x4_t vec1, vec2;
  432. mve_pred16_t p0 = vctp32q(blkCnt);
  433. vec1 = vldrwq_f32(pInT1);
  434. vec2 = vldrwq_f32(pPRT_in);
  435. vec1 = vfmsq_f32(vec1, tmpV, vec2);
  436. vstrwq_p_f32(pInT1, vec1, p0);
  437. pInT1 += blkCnt;
  438. }
  439. blkCnt = numCols >> 2;
  440. while (blkCnt > 0U)
  441. {
  442. f32x4_t vec1, vec2;
  443. /*
  444. * Replace the element by the sum of that row
  445. * and a multiple of the reference row
  446. */
  447. vec1 = vldrwq_f32(pOutT1);
  448. vec2 = vldrwq_f32(pPRT_pDst);
  449. vec1 = vfmsq_f32(vec1, tmpV, vec2);
  450. vstrwq_f32(pOutT1, vec1);
  451. pPRT_pDst += 4;
  452. pOutT1 += 4;
  453. /*
  454. * Decrement the blockSize loop counter
  455. */
  456. blkCnt--;
  457. }
  458. /*
  459. * tail
  460. * (will be merged thru tail predication)
  461. */
  462. blkCnt = numCols & 3;
  463. if (blkCnt > 0U)
  464. {
  465. f32x4_t vec1, vec2;
  466. mve_pred16_t p0 = vctp32q(blkCnt);
  467. vec1 = vldrwq_f32(pOutT1);
  468. vec2 = vldrwq_f32(pPRT_pDst);
  469. vec1 = vfmsq_f32(vec1, tmpV, vec2);
  470. vstrwq_p_f32(pOutT1, vec1, p0);
  471. pInT2 += blkCnt;
  472. pOutT1 += blkCnt;
  473. }
  474. }
  475. /*
  476. * Increment the temporary input pointer
  477. */
  478. pInT1 = pInT1 + l;
  479. }
  480. /*
  481. * Increment the input pointer
  482. */
  483. pIn++;
  484. /*
  485. * Decrement the loop counter
  486. */
  487. loopCnt--;
  488. /*
  489. * Increment the index modifier
  490. */
  491. l++;
  492. }
  493. /*
  494. * Set status as ARM_MATH_SUCCESS
  495. */
  496. status = ARM_MATH_SUCCESS;
  497. if ((flag != 1U) && (in == 0.0f))
  498. {
  499. pIn = pSrc->pData;
  500. for (i = 0; i < numRows * numCols; i++)
  501. {
  502. if (pIn[i] != 0.0f)
  503. break;
  504. }
  505. if (i == numRows * numCols)
  506. status = ARM_MATH_SINGULAR;
  507. }
  508. }
  509. /* Return to application */
  510. return (status);
  511. }
  512. #else
  513. #if defined(ARM_MATH_NEON)
  514. arm_status arm_mat_inverse_f32(
  515. const arm_matrix_instance_f32 * pSrc,
  516. arm_matrix_instance_f32 * pDst)
  517. {
  518. float32_t *pIn = pSrc->pData; /* input data matrix pointer */
  519. float32_t *pOut = pDst->pData; /* output data matrix pointer */
  520. float32_t *pInT1, *pInT2; /* Temporary input data matrix pointer */
  521. float32_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
  522. float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */
  523. uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */
  524. uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */
  525. float32_t Xchg, in = 0.0f, in1; /* Temporary input values */
  526. uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l; /* loop counters */
  527. arm_status status; /* status of matrix inverse */
  528. float32x4_t vec1;
  529. float32x4_t vec2;
  530. float32x4_t tmpV;
  531. #ifdef ARM_MATH_MATRIX_CHECK
  532. /* Check for matrix mismatch condition */
  533. if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols)
  534. || (pSrc->numRows != pDst->numRows))
  535. {
  536. /* Set status as ARM_MATH_SIZE_MISMATCH */
  537. status = ARM_MATH_SIZE_MISMATCH;
  538. }
  539. else
  540. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  541. {
  542. /*--------------------------------------------------------------------------------------------------------------
  543. * Matrix Inverse can be solved using elementary row operations.
  544. *
  545. * Gauss-Jordan Method:
  546. *
  547. * 1. First combine the identity matrix and the input matrix separated by a bar to form an
  548. * augmented matrix as follows:
  549. * _ _ _ _
  550. * | a11 a12 | 1 0 | | X11 X12 |
  551. * | | | = | |
  552. * |_ a21 a22 | 0 1 _| |_ X21 X21 _|
  553. *
  554. * 2. In our implementation, pDst Matrix is used as identity matrix.
  555. *
  556. * 3. Begin with the first row. Let i = 1.
  557. *
  558. * 4. Check to see if the pivot for row i is zero.
  559. * The pivot is the element of the main diagonal that is on the current row.
  560. * For instance, if working with row i, then the pivot element is aii.
  561. * If the pivot is zero, exchange that row with a row below it that does not
  562. * contain a zero in column i. If this is not possible, then an inverse
  563. * to that matrix does not exist.
  564. *
  565. * 5. Divide every element of row i by the pivot.
  566. *
  567. * 6. For every row below and row i, replace that row with the sum of that row and
  568. * a multiple of row i so that each new element in column i below row i is zero.
  569. *
  570. * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
  571. * for every element below and above the main diagonal.
  572. *
  573. * 8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc).
  574. * Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst).
  575. *----------------------------------------------------------------------------------------------------------------*/
  576. /* Working pointer for destination matrix */
  577. pOutT1 = pOut;
  578. /* Loop over the number of rows */
  579. rowCnt = numRows;
  580. /* Making the destination matrix as identity matrix */
  581. while (rowCnt > 0U)
  582. {
  583. /* Writing all zeroes in lower triangle of the destination matrix */
  584. j = numRows - rowCnt;
  585. while (j > 0U)
  586. {
  587. *pOutT1++ = 0.0f;
  588. j--;
  589. }
  590. /* Writing all ones in the diagonal of the destination matrix */
  591. *pOutT1++ = 1.0f;
  592. /* Writing all zeroes in upper triangle of the destination matrix */
  593. j = rowCnt - 1U;
  594. while (j > 0U)
  595. {
  596. *pOutT1++ = 0.0f;
  597. j--;
  598. }
  599. /* Decrement the loop counter */
  600. rowCnt--;
  601. }
  602. /* Loop over the number of columns of the input matrix.
  603. All the elements in each column are processed by the row operations */
  604. loopCnt = numCols;
  605. /* Index modifier to navigate through the columns */
  606. l = 0U;
  607. while (loopCnt > 0U)
  608. {
  609. /* Check if the pivot element is zero..
  610. * If it is zero then interchange the row with non zero row below.
  611. * If there is no non zero element to replace in the rows below,
  612. * then the matrix is Singular. */
  613. /* Working pointer for the input matrix that points
  614. * to the pivot element of the particular row */
  615. pInT1 = pIn + (l * numCols);
  616. /* Working pointer for the destination matrix that points
  617. * to the pivot element of the particular row */
  618. pOutT1 = pOut + (l * numCols);
  619. /* Temporary variable to hold the pivot value */
  620. in = *pInT1;
  621. /* Check if the pivot element is zero */
  622. if (*pInT1 == 0.0f)
  623. {
  624. /* Loop over the number rows present below */
  625. for (i = 1U; i < numRows - l; i++)
  626. {
  627. /* Update the input and destination pointers */
  628. pInT2 = pInT1 + (numCols * i);
  629. pOutT2 = pOutT1 + (numCols * i);
  630. /* Check if there is a non zero pivot element to
  631. * replace in the rows below */
  632. if (*pInT2 != 0.0f)
  633. {
  634. /* Loop over number of columns
  635. * to the right of the pilot element */
  636. j = numCols - l;
  637. while (j > 0U)
  638. {
  639. /* Exchange the row elements of the input matrix */
  640. Xchg = *pInT2;
  641. *pInT2++ = *pInT1;
  642. *pInT1++ = Xchg;
  643. /* Decrement the loop counter */
  644. j--;
  645. }
  646. /* Loop over number of columns of the destination matrix */
  647. j = numCols;
  648. while (j > 0U)
  649. {
  650. /* Exchange the row elements of the destination matrix */
  651. Xchg = *pOutT2;
  652. *pOutT2++ = *pOutT1;
  653. *pOutT1++ = Xchg;
  654. /* Decrement the loop counter */
  655. j--;
  656. }
  657. /* Flag to indicate whether exchange is done or not */
  658. flag = 1U;
  659. /* Break after exchange is done */
  660. break;
  661. }
  662. }
  663. }
  664. /* Update the status if the matrix is singular */
  665. if ((flag != 1U) && (in == 0.0f))
  666. {
  667. return ARM_MATH_SINGULAR;
  668. }
  669. /* Points to the pivot row of input and destination matrices */
  670. pPivotRowIn = pIn + (l * numCols);
  671. pPivotRowDst = pOut + (l * numCols);
  672. /* Temporary pointers to the pivot row pointers */
  673. pInT1 = pPivotRowIn;
  674. pInT2 = pPivotRowDst;
  675. /* Pivot element of the row */
  676. in = *pPivotRowIn;
  677. tmpV = vdupq_n_f32(1.0f/in);
  678. /* Loop over number of columns
  679. * to the right of the pilot element */
  680. j = (numCols - l) >> 2;
  681. while (j > 0U)
  682. {
  683. /* Divide each element of the row of the input matrix
  684. * by the pivot element */
  685. vec1 = vld1q_f32(pInT1);
  686. vec1 = vmulq_f32(vec1, tmpV);
  687. vst1q_f32(pInT1, vec1);
  688. pInT1 += 4;
  689. /* Decrement the loop counter */
  690. j--;
  691. }
  692. /* Tail */
  693. j = (numCols - l) & 3;
  694. while (j > 0U)
  695. {
  696. /* Divide each element of the row of the input matrix
  697. * by the pivot element */
  698. in1 = *pInT1;
  699. *pInT1++ = in1 / in;
  700. /* Decrement the loop counter */
  701. j--;
  702. }
  703. /* Loop over number of columns of the destination matrix */
  704. j = numCols >> 2;
  705. while (j > 0U)
  706. {
  707. /* Divide each element of the row of the destination matrix
  708. * by the pivot element */
  709. vec1 = vld1q_f32(pInT2);
  710. vec1 = vmulq_f32(vec1, tmpV);
  711. vst1q_f32(pInT2, vec1);
  712. pInT2 += 4;
  713. /* Decrement the loop counter */
  714. j--;
  715. }
  716. /* Tail */
  717. j = numCols & 3;
  718. while (j > 0U)
  719. {
  720. /* Divide each element of the row of the destination matrix
  721. * by the pivot element */
  722. in1 = *pInT2;
  723. *pInT2++ = in1 / in;
  724. /* Decrement the loop counter */
  725. j--;
  726. }
  727. /* Replace the rows with the sum of that row and a multiple of row i
  728. * so that each new element in column i above row i is zero.*/
  729. /* Temporary pointers for input and destination matrices */
  730. pInT1 = pIn;
  731. pInT2 = pOut;
  732. /* index used to check for pivot element */
  733. i = 0U;
  734. /* Loop over number of rows */
  735. /* to be replaced by the sum of that row and a multiple of row i */
  736. k = numRows;
  737. while (k > 0U)
  738. {
  739. /* Check for the pivot element */
  740. if (i == l)
  741. {
  742. /* If the processing element is the pivot element,
  743. only the columns to the right are to be processed */
  744. pInT1 += numCols - l;
  745. pInT2 += numCols;
  746. }
  747. else
  748. {
  749. /* Element of the reference row */
  750. in = *pInT1;
  751. tmpV = vdupq_n_f32(in);
  752. /* Working pointers for input and destination pivot rows */
  753. pPRT_in = pPivotRowIn;
  754. pPRT_pDst = pPivotRowDst;
  755. /* Loop over the number of columns to the right of the pivot element,
  756. to replace the elements in the input matrix */
  757. j = (numCols - l) >> 2;
  758. while (j > 0U)
  759. {
  760. /* Replace the element by the sum of that row
  761. and a multiple of the reference row */
  762. vec1 = vld1q_f32(pInT1);
  763. vec2 = vld1q_f32(pPRT_in);
  764. vec1 = vmlsq_f32(vec1, tmpV, vec2);
  765. vst1q_f32(pInT1, vec1);
  766. pPRT_in += 4;
  767. pInT1 += 4;
  768. /* Decrement the loop counter */
  769. j--;
  770. }
  771. /* Tail */
  772. j = (numCols - l) & 3;
  773. while (j > 0U)
  774. {
  775. /* Replace the element by the sum of that row
  776. and a multiple of the reference row */
  777. in1 = *pInT1;
  778. *pInT1++ = in1 - (in * *pPRT_in++);
  779. /* Decrement the loop counter */
  780. j--;
  781. }
  782. /* Loop over the number of columns to
  783. replace the elements in the destination matrix */
  784. j = numCols >> 2;
  785. while (j > 0U)
  786. {
  787. /* Replace the element by the sum of that row
  788. and a multiple of the reference row */
  789. vec1 = vld1q_f32(pInT2);
  790. vec2 = vld1q_f32(pPRT_pDst);
  791. vec1 = vmlsq_f32(vec1, tmpV, vec2);
  792. vst1q_f32(pInT2, vec1);
  793. pPRT_pDst += 4;
  794. pInT2 += 4;
  795. /* Decrement the loop counter */
  796. j--;
  797. }
  798. /* Tail */
  799. j = numCols & 3;
  800. while (j > 0U)
  801. {
  802. /* Replace the element by the sum of that row
  803. and a multiple of the reference row */
  804. in1 = *pInT2;
  805. *pInT2++ = in1 - (in * *pPRT_pDst++);
  806. /* Decrement the loop counter */
  807. j--;
  808. }
  809. }
  810. /* Increment the temporary input pointer */
  811. pInT1 = pInT1 + l;
  812. /* Decrement the loop counter */
  813. k--;
  814. /* Increment the pivot index */
  815. i++;
  816. }
  817. /* Increment the input pointer */
  818. pIn++;
  819. /* Decrement the loop counter */
  820. loopCnt--;
  821. /* Increment the index modifier */
  822. l++;
  823. }
  824. /* Set status as ARM_MATH_SUCCESS */
  825. status = ARM_MATH_SUCCESS;
  826. if ((flag != 1U) && (in == 0.0f))
  827. {
  828. pIn = pSrc->pData;
  829. for (i = 0; i < numRows * numCols; i++)
  830. {
  831. if (pIn[i] != 0.0f)
  832. break;
  833. }
  834. if (i == numRows * numCols)
  835. status = ARM_MATH_SINGULAR;
  836. }
  837. }
  838. /* Return to application */
  839. return (status);
  840. }
  841. #else
  842. arm_status arm_mat_inverse_f32(
  843. const arm_matrix_instance_f32 * pSrc,
  844. arm_matrix_instance_f32 * pDst)
  845. {
  846. float32_t *pIn = pSrc->pData; /* input data matrix pointer */
  847. float32_t *pOut = pDst->pData; /* output data matrix pointer */
  848. float32_t *pInT1, *pInT2; /* Temporary input data matrix pointer */
  849. float32_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
  850. float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */
  851. uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */
  852. uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */
  853. #if defined (ARM_MATH_DSP)
  854. float32_t Xchg, in = 0.0f, in1; /* Temporary input values */
  855. uint32_t i, rowCnt, flag = 0U, j, loopCnt, k,l; /* loop counters */
  856. arm_status status; /* status of matrix inverse */
  857. #ifdef ARM_MATH_MATRIX_CHECK
  858. /* Check for matrix mismatch condition */
  859. if ((pSrc->numRows != pSrc->numCols) ||
  860. (pDst->numRows != pDst->numCols) ||
  861. (pSrc->numRows != pDst->numRows) )
  862. {
  863. /* Set status as ARM_MATH_SIZE_MISMATCH */
  864. status = ARM_MATH_SIZE_MISMATCH;
  865. }
  866. else
  867. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  868. {
  869. /*--------------------------------------------------------------------------------------------------------------
  870. * Matrix Inverse can be solved using elementary row operations.
  871. *
  872. * Gauss-Jordan Method:
  873. *
  874. * 1. First combine the identity matrix and the input matrix separated by a bar to form an
  875. * augmented matrix as follows:
  876. * _ _ _ _
  877. * | a11 a12 | 1 0 | | X11 X12 |
  878. * | | | = | |
  879. * |_ a21 a22 | 0 1 _| |_ X21 X21 _|
  880. *
  881. * 2. In our implementation, pDst Matrix is used as identity matrix.
  882. *
  883. * 3. Begin with the first row. Let i = 1.
  884. *
  885. * 4. Check to see if the pivot for row i is zero.
  886. * The pivot is the element of the main diagonal that is on the current row.
  887. * For instance, if working with row i, then the pivot element is aii.
  888. * If the pivot is zero, exchange that row with a row below it that does not
  889. * contain a zero in column i. If this is not possible, then an inverse
  890. * to that matrix does not exist.
  891. *
  892. * 5. Divide every element of row i by the pivot.
  893. *
  894. * 6. For every row below and row i, replace that row with the sum of that row and
  895. * a multiple of row i so that each new element in column i below row i is zero.
  896. *
  897. * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
  898. * for every element below and above the main diagonal.
  899. *
  900. * 8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc).
  901. * Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst).
  902. *----------------------------------------------------------------------------------------------------------------*/
  903. /* Working pointer for destination matrix */
  904. pOutT1 = pOut;
  905. /* Loop over the number of rows */
  906. rowCnt = numRows;
  907. /* Making the destination matrix as identity matrix */
  908. while (rowCnt > 0U)
  909. {
  910. /* Writing all zeroes in lower triangle of the destination matrix */
  911. j = numRows - rowCnt;
  912. while (j > 0U)
  913. {
  914. *pOutT1++ = 0.0f;
  915. j--;
  916. }
  917. /* Writing all ones in the diagonal of the destination matrix */
  918. *pOutT1++ = 1.0f;
  919. /* Writing all zeroes in upper triangle of the destination matrix */
  920. j = rowCnt - 1U;
  921. while (j > 0U)
  922. {
  923. *pOutT1++ = 0.0f;
  924. j--;
  925. }
  926. /* Decrement loop counter */
  927. rowCnt--;
  928. }
  929. /* Loop over the number of columns of the input matrix.
  930. All the elements in each column are processed by the row operations */
  931. loopCnt = numCols;
  932. /* Index modifier to navigate through the columns */
  933. l = 0U;
  934. while (loopCnt > 0U)
  935. {
  936. /* Check if the pivot element is zero..
  937. * If it is zero then interchange the row with non zero row below.
  938. * If there is no non zero element to replace in the rows below,
  939. * then the matrix is Singular. */
  940. /* Working pointer for the input matrix that points
  941. * to the pivot element of the particular row */
  942. pInT1 = pIn + (l * numCols);
  943. /* Working pointer for the destination matrix that points
  944. * to the pivot element of the particular row */
  945. pOutT1 = pOut + (l * numCols);
  946. /* Temporary variable to hold the pivot value */
  947. in = *pInT1;
  948. /* Check if the pivot element is zero */
  949. if (*pInT1 == 0.0f)
  950. {
  951. /* Loop over the number rows present below */
  952. for (i = 1U; i < numRows - l; i++)
  953. {
  954. /* Update the input and destination pointers */
  955. pInT2 = pInT1 + (numCols * i);
  956. pOutT2 = pOutT1 + (numCols * i);
  957. /* Check if there is a non zero pivot element to
  958. * replace in the rows below */
  959. if (*pInT2 != 0.0f)
  960. {
  961. /* Loop over number of columns
  962. * to the right of the pilot element */
  963. j = numCols - l;
  964. while (j > 0U)
  965. {
  966. /* Exchange the row elements of the input matrix */
  967. Xchg = *pInT2;
  968. *pInT2++ = *pInT1;
  969. *pInT1++ = Xchg;
  970. /* Decrement the loop counter */
  971. j--;
  972. }
  973. /* Loop over number of columns of the destination matrix */
  974. j = numCols;
  975. while (j > 0U)
  976. {
  977. /* Exchange the row elements of the destination matrix */
  978. Xchg = *pOutT2;
  979. *pOutT2++ = *pOutT1;
  980. *pOutT1++ = Xchg;
  981. /* Decrement loop counter */
  982. j--;
  983. }
  984. /* Flag to indicate whether exchange is done or not */
  985. flag = 1U;
  986. /* Break after exchange is done */
  987. break;
  988. }
  989. /* Decrement loop counter */
  990. }
  991. }
  992. /* Update the status if the matrix is singular */
  993. if ((flag != 1U) && (in == 0.0f))
  994. {
  995. return ARM_MATH_SINGULAR;
  996. }
  997. /* Points to the pivot row of input and destination matrices */
  998. pPivotRowIn = pIn + (l * numCols);
  999. pPivotRowDst = pOut + (l * numCols);
  1000. /* Temporary pointers to the pivot row pointers */
  1001. pInT1 = pPivotRowIn;
  1002. pInT2 = pPivotRowDst;
  1003. /* Pivot element of the row */
  1004. in = *pPivotRowIn;
  1005. /* Loop over number of columns
  1006. * to the right of the pilot element */
  1007. j = (numCols - l);
  1008. while (j > 0U)
  1009. {
  1010. /* Divide each element of the row of the input matrix
  1011. * by the pivot element */
  1012. in1 = *pInT1;
  1013. *pInT1++ = in1 / in;
  1014. /* Decrement the loop counter */
  1015. j--;
  1016. }
  1017. /* Loop over number of columns of the destination matrix */
  1018. j = numCols;
  1019. while (j > 0U)
  1020. {
  1021. /* Divide each element of the row of the destination matrix
  1022. * by the pivot element */
  1023. in1 = *pInT2;
  1024. *pInT2++ = in1 / in;
  1025. /* Decrement the loop counter */
  1026. j--;
  1027. }
  1028. /* Replace the rows with the sum of that row and a multiple of row i
  1029. * so that each new element in column i above row i is zero.*/
  1030. /* Temporary pointers for input and destination matrices */
  1031. pInT1 = pIn;
  1032. pInT2 = pOut;
  1033. /* index used to check for pivot element */
  1034. i = 0U;
  1035. /* Loop over number of rows */
  1036. /* to be replaced by the sum of that row and a multiple of row i */
  1037. k = numRows;
  1038. while (k > 0U)
  1039. {
  1040. /* Check for the pivot element */
  1041. if (i == l)
  1042. {
  1043. /* If the processing element is the pivot element,
  1044. only the columns to the right are to be processed */
  1045. pInT1 += numCols - l;
  1046. pInT2 += numCols;
  1047. }
  1048. else
  1049. {
  1050. /* Element of the reference row */
  1051. in = *pInT1;
  1052. /* Working pointers for input and destination pivot rows */
  1053. pPRT_in = pPivotRowIn;
  1054. pPRT_pDst = pPivotRowDst;
  1055. /* Loop over the number of columns to the right of the pivot element,
  1056. to replace the elements in the input matrix */
  1057. j = (numCols - l);
  1058. while (j > 0U)
  1059. {
  1060. /* Replace the element by the sum of that row
  1061. and a multiple of the reference row */
  1062. in1 = *pInT1;
  1063. *pInT1++ = in1 - (in * *pPRT_in++);
  1064. /* Decrement the loop counter */
  1065. j--;
  1066. }
  1067. /* Loop over the number of columns to
  1068. replace the elements in the destination matrix */
  1069. j = numCols;
  1070. while (j > 0U)
  1071. {
  1072. /* Replace the element by the sum of that row
  1073. and a multiple of the reference row */
  1074. in1 = *pInT2;
  1075. *pInT2++ = in1 - (in * *pPRT_pDst++);
  1076. /* Decrement loop counter */
  1077. j--;
  1078. }
  1079. }
  1080. /* Increment temporary input pointer */
  1081. pInT1 = pInT1 + l;
  1082. /* Decrement loop counter */
  1083. k--;
  1084. /* Increment pivot index */
  1085. i++;
  1086. }
  1087. /* Increment the input pointer */
  1088. pIn++;
  1089. /* Decrement the loop counter */
  1090. loopCnt--;
  1091. /* Increment the index modifier */
  1092. l++;
  1093. }
  1094. #else
  1095. float32_t Xchg, in = 0.0f; /* Temporary input values */
  1096. uint32_t i, rowCnt, flag = 0U, j, loopCnt, l; /* loop counters */
  1097. arm_status status; /* status of matrix inverse */
  1098. #ifdef ARM_MATH_MATRIX_CHECK
  1099. /* Check for matrix mismatch condition */
  1100. if ((pSrc->numRows != pSrc->numCols) ||
  1101. (pDst->numRows != pDst->numCols) ||
  1102. (pSrc->numRows != pDst->numRows) )
  1103. {
  1104. /* Set status as ARM_MATH_SIZE_MISMATCH */
  1105. status = ARM_MATH_SIZE_MISMATCH;
  1106. }
  1107. else
  1108. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  1109. {
  1110. /*--------------------------------------------------------------------------------------------------------------
  1111. * Matrix Inverse can be solved using elementary row operations.
  1112. *
  1113. * Gauss-Jordan Method:
  1114. *
  1115. * 1. First combine the identity matrix and the input matrix separated by a bar to form an
  1116. * augmented matrix as follows:
  1117. * _ _ _ _ _ _ _ _
  1118. * | | a11 a12 | | | 1 0 | | | X11 X12 |
  1119. * | | | | | | | = | |
  1120. * |_ |_ a21 a22 _| | |_0 1 _| _| |_ X21 X21 _|
  1121. *
  1122. * 2. In our implementation, pDst Matrix is used as identity matrix.
  1123. *
  1124. * 3. Begin with the first row. Let i = 1.
  1125. *
  1126. * 4. Check to see if the pivot for row i is zero.
  1127. * The pivot is the element of the main diagonal that is on the current row.
  1128. * For instance, if working with row i, then the pivot element is aii.
  1129. * If the pivot is zero, exchange that row with a row below it that does not
  1130. * contain a zero in column i. If this is not possible, then an inverse
  1131. * to that matrix does not exist.
  1132. *
  1133. * 5. Divide every element of row i by the pivot.
  1134. *
  1135. * 6. For every row below and row i, replace that row with the sum of that row and
  1136. * a multiple of row i so that each new element in column i below row i is zero.
  1137. *
  1138. * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
  1139. * for every element below and above the main diagonal.
  1140. *
  1141. * 8. Now an identical matrix is formed to the left of the bar(input matrix, src).
  1142. * Therefore, the matrix to the right of the bar is our solution(dst matrix, dst).
  1143. *----------------------------------------------------------------------------------------------------------------*/
  1144. /* Working pointer for destination matrix */
  1145. pOutT1 = pOut;
  1146. /* Loop over the number of rows */
  1147. rowCnt = numRows;
  1148. /* Making the destination matrix as identity matrix */
  1149. while (rowCnt > 0U)
  1150. {
  1151. /* Writing all zeroes in lower triangle of the destination matrix */
  1152. j = numRows - rowCnt;
  1153. while (j > 0U)
  1154. {
  1155. *pOutT1++ = 0.0f;
  1156. j--;
  1157. }
  1158. /* Writing all ones in the diagonal of the destination matrix */
  1159. *pOutT1++ = 1.0f;
  1160. /* Writing all zeroes in upper triangle of the destination matrix */
  1161. j = rowCnt - 1U;
  1162. while (j > 0U)
  1163. {
  1164. *pOutT1++ = 0.0f;
  1165. j--;
  1166. }
  1167. /* Decrement loop counter */
  1168. rowCnt--;
  1169. }
  1170. /* Loop over the number of columns of the input matrix.
  1171. All the elements in each column are processed by the row operations */
  1172. loopCnt = numCols;
  1173. /* Index modifier to navigate through the columns */
  1174. l = 0U;
  1175. while (loopCnt > 0U)
  1176. {
  1177. /* Check if the pivot element is zero..
  1178. * If it is zero then interchange the row with non zero row below.
  1179. * If there is no non zero element to replace in the rows below,
  1180. * then the matrix is Singular. */
  1181. /* Working pointer for the input matrix that points
  1182. * to the pivot element of the particular row */
  1183. pInT1 = pIn + (l * numCols);
  1184. /* Working pointer for the destination matrix that points
  1185. * to the pivot element of the particular row */
  1186. pOutT1 = pOut + (l * numCols);
  1187. /* Temporary variable to hold the pivot value */
  1188. in = *pInT1;
  1189. /* Check if the pivot element is zero */
  1190. if (*pInT1 == 0.0f)
  1191. {
  1192. /* Loop over the number rows present below */
  1193. for (i = 1U; i < numRows-l; i++)
  1194. {
  1195. /* Update the input and destination pointers */
  1196. pInT2 = pInT1 + (numCols * i);
  1197. pOutT2 = pOutT1 + (numCols * i);
  1198. /* Check if there is a non zero pivot element to
  1199. * replace in the rows below */
  1200. if (*pInT2 != 0.0f)
  1201. {
  1202. /* Loop over number of columns
  1203. * to the right of the pilot element */
  1204. for (j = 0U; j < (numCols - l); j++)
  1205. {
  1206. /* Exchange the row elements of the input matrix */
  1207. Xchg = *pInT2;
  1208. *pInT2++ = *pInT1;
  1209. *pInT1++ = Xchg;
  1210. }
  1211. for (j = 0U; j < numCols; j++)
  1212. {
  1213. Xchg = *pOutT2;
  1214. *pOutT2++ = *pOutT1;
  1215. *pOutT1++ = Xchg;
  1216. }
  1217. /* Flag to indicate whether exchange is done or not */
  1218. flag = 1U;
  1219. /* Break after exchange is done */
  1220. break;
  1221. }
  1222. }
  1223. }
  1224. /* Update the status if the matrix is singular */
  1225. if ((flag != 1U) && (in == 0.0f))
  1226. {
  1227. return ARM_MATH_SINGULAR;
  1228. }
  1229. /* Points to the pivot row of input and destination matrices */
  1230. pPivotRowIn = pIn + (l * numCols);
  1231. pPivotRowDst = pOut + (l * numCols);
  1232. /* Temporary pointers to the pivot row pointers */
  1233. pInT1 = pPivotRowIn;
  1234. pOutT1 = pPivotRowDst;
  1235. /* Pivot element of the row */
  1236. in = *(pIn + (l * numCols));
  1237. /* Loop over number of columns
  1238. * to the right of the pilot element */
  1239. for (j = 0U; j < (numCols - l); j++)
  1240. {
  1241. /* Divide each element of the row of the input matrix
  1242. * by the pivot element */
  1243. *pInT1 = *pInT1 / in;
  1244. pInT1++;
  1245. }
  1246. for (j = 0U; j < numCols; j++)
  1247. {
  1248. /* Divide each element of the row of the destination matrix
  1249. * by the pivot element */
  1250. *pOutT1 = *pOutT1 / in;
  1251. pOutT1++;
  1252. }
  1253. /* Replace the rows with the sum of that row and a multiple of row i
  1254. * so that each new element in column i above row i is zero.*/
  1255. /* Temporary pointers for input and destination matrices */
  1256. pInT1 = pIn;
  1257. pOutT1 = pOut;
  1258. for (i = 0U; i < numRows; i++)
  1259. {
  1260. /* Check for the pivot element */
  1261. if (i == l)
  1262. {
  1263. /* If the processing element is the pivot element,
  1264. only the columns to the right are to be processed */
  1265. pInT1 += numCols - l;
  1266. pOutT1 += numCols;
  1267. }
  1268. else
  1269. {
  1270. /* Element of the reference row */
  1271. in = *pInT1;
  1272. /* Working pointers for input and destination pivot rows */
  1273. pPRT_in = pPivotRowIn;
  1274. pPRT_pDst = pPivotRowDst;
  1275. /* Loop over the number of columns to the right of the pivot element,
  1276. to replace the elements in the input matrix */
  1277. for (j = 0U; j < (numCols - l); j++)
  1278. {
  1279. /* Replace the element by the sum of that row
  1280. and a multiple of the reference row */
  1281. *pInT1 = *pInT1 - (in * *pPRT_in++);
  1282. pInT1++;
  1283. }
  1284. /* Loop over the number of columns to
  1285. replace the elements in the destination matrix */
  1286. for (j = 0U; j < numCols; j++)
  1287. {
  1288. /* Replace the element by the sum of that row
  1289. and a multiple of the reference row */
  1290. *pOutT1 = *pOutT1 - (in * *pPRT_pDst++);
  1291. pOutT1++;
  1292. }
  1293. }
  1294. /* Increment temporary input pointer */
  1295. pInT1 = pInT1 + l;
  1296. }
  1297. /* Increment the input pointer */
  1298. pIn++;
  1299. /* Decrement the loop counter */
  1300. loopCnt--;
  1301. /* Increment the index modifier */
  1302. l++;
  1303. }
  1304. #endif /* #if defined (ARM_MATH_DSP) */
  1305. /* Set status as ARM_MATH_SUCCESS */
  1306. status = ARM_MATH_SUCCESS;
  1307. if ((flag != 1U) && (in == 0.0f))
  1308. {
  1309. pIn = pSrc->pData;
  1310. for (i = 0; i < numRows * numCols; i++)
  1311. {
  1312. if (pIn[i] != 0.0f)
  1313. break;
  1314. }
  1315. if (i == numRows * numCols)
  1316. status = ARM_MATH_SINGULAR;
  1317. }
  1318. }
  1319. /* Return to application */
  1320. return (status);
  1321. }
  1322. #endif /* #if defined(ARM_MATH_NEON) */
  1323. #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
  1324. /**
  1325. @} end of MatrixInv group
  1326. */