arm_mat_inverse_f32.c 50 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_mat_inverse_f32.c
  4. * Description: Floating-point matrix inverse
  5. *
  6. * $Date: 18. March 2019
  7. * $Revision: V1.6.0
  8. *
  9. * Target Processor: Cortex-M cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "arm_math.h"
  29. /**
  30. @ingroup groupMatrix
  31. */
  32. /**
  33. @defgroup MatrixInv Matrix Inverse
  34. Computes the inverse of a matrix.
  35. The inverse is defined only if the input matrix is square and non-singular (the determinant is non-zero).
  36. The function checks that the input and output matrices are square and of the same size.
  37. Matrix inversion is numerically sensitive and the CMSIS DSP library only supports matrix
  38. inversion of floating-point matrices.
  39. @par Algorithm
  40. The Gauss-Jordan method is used to find the inverse.
  41. The algorithm performs a sequence of elementary row-operations until it
  42. reduces the input matrix to an identity matrix. Applying the same sequence
  43. of elementary row-operations to an identity matrix yields the inverse matrix.
  44. If the input matrix is singular, then the algorithm terminates and returns error status
  45. <code>ARM_MATH_SINGULAR</code>.
  46. \image html MatrixInverse.gif "Matrix Inverse of a 3 x 3 matrix using Gauss-Jordan Method"
  47. */
  48. /**
  49. @addtogroup MatrixInv
  50. @{
  51. */
  52. /**
  53. @brief Floating-point matrix inverse.
  54. @param[in] pSrc points to input matrix structure. The source matrix is modified by the function.
  55. @param[out] pDst points to output matrix structure
  56. @return execution status
  57. - \ref ARM_MATH_SUCCESS : Operation successful
  58. - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
  59. - \ref ARM_MATH_SINGULAR : Input matrix is found to be singular (non-invertible)
  60. */
  61. #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
  62. arm_status arm_mat_inverse_f32(
  63. const arm_matrix_instance_f32 * pSrc,
  64. arm_matrix_instance_f32 * pDst)
  65. {
  66. float32_t *pIn = pSrc->pData; /* input data matrix pointer */
  67. float32_t *pOut = pDst->pData; /* output data matrix pointer */
  68. float32_t *pInT1, *pInT2; /* Temporary input data matrix pointer */
  69. float32_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
  70. float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */
  71. uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */
  72. uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */
  73. float32_t *pTmpA, *pTmpB;
  74. float32_t in = 0.0f; /* Temporary input values */
  75. uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l; /* loop counters */
  76. arm_status status; /* status of matrix inverse */
  77. uint32_t blkCnt;
  78. #ifdef ARM_MATH_MATRIX_CHECK
  79. /* Check for matrix mismatch condition */
  80. if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols)
  81. || (pSrc->numRows != pDst->numRows))
  82. {
  83. /* Set status as ARM_MATH_SIZE_MISMATCH */
  84. status = ARM_MATH_SIZE_MISMATCH;
  85. }
  86. else
  87. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  88. {
  89. /*--------------------------------------------------------------------------------------------------------------
  90. * Matrix Inverse can be solved using elementary row operations.
  91. *
  92. * Gauss-Jordan Method:
  93. *
  94. * 1. First combine the identity matrix and the input matrix separated by a bar to form an
  95. * augmented matrix as follows:
  96. * _ _ _ _ _ _ _ _
  97. * | | a11 a12 | | | 1 0 | | | X11 X12 |
  98. * | | | | | | | = | |
  99. * |_ |_ a21 a22 _| | |_0 1 _| _| |_ X21 X21 _|
  100. *
  101. * 2. In our implementation, pDst Matrix is used as identity matrix.
  102. *
  103. * 3. Begin with the first row. Let i = 1.
  104. *
  105. * 4. Check to see if the pivot for row i is zero.
  106. * The pivot is the element of the main diagonal that is on the current row.
  107. * For instance, if working with row i, then the pivot element is aii.
  108. * If the pivot is zero, exchange that row with a row below it that does not
  109. * contain a zero in column i. If this is not possible, then an inverse
  110. * to that matrix does not exist.
  111. *
  112. * 5. Divide every element of row i by the pivot.
  113. *
  114. * 6. For every row below and row i, replace that row with the sum of that row and
  115. * a multiple of row i so that each new element in column i below row i is zero.
  116. *
  117. * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
  118. * for every element below and above the main diagonal.
  119. *
  120. * 8. Now an identical matrix is formed to the left of the bar(input matrix, src).
  121. * Therefore, the matrix to the right of the bar is our solution(dst matrix, dst).
  122. *----------------------------------------------------------------------------------------------------------------*/
  123. /*
  124. * Working pointer for destination matrix
  125. */
  126. pOutT1 = pOut;
  127. /*
  128. * Loop over the number of rows
  129. */
  130. rowCnt = numRows;
  131. /*
  132. * Making the destination matrix as identity matrix
  133. */
  134. while (rowCnt > 0U)
  135. {
  136. /*
  137. * Writing all zeroes in lower triangle of the destination matrix
  138. */
  139. j = numRows - rowCnt;
  140. while (j > 0U)
  141. {
  142. *pOutT1++ = 0.0f;
  143. j--;
  144. }
  145. /*
  146. * Writing all ones in the diagonal of the destination matrix
  147. */
  148. *pOutT1++ = 1.0f;
  149. /*
  150. * Writing all zeroes in upper triangle of the destination matrix
  151. */
  152. j = rowCnt - 1U;
  153. while (j > 0U)
  154. {
  155. *pOutT1++ = 0.0f;
  156. j--;
  157. }
  158. /*
  159. * Decrement the loop counter
  160. */
  161. rowCnt--;
  162. }
  163. /*
  164. * Loop over the number of columns of the input matrix.
  165. * All the elements in each column are processed by the row operations
  166. */
  167. loopCnt = numCols;
  168. /*
  169. * Index modifier to navigate through the columns
  170. */
  171. l = 0U;
  172. while (loopCnt > 0U)
  173. {
  174. /*
  175. * Check if the pivot element is zero..
  176. * If it is zero then interchange the row with non zero row below.
  177. * If there is no non zero element to replace in the rows below,
  178. * then the matrix is Singular.
  179. */
  180. /*
  181. * Working pointer for the input matrix that points
  182. * * to the pivot element of the particular row
  183. */
  184. pInT1 = pIn + (l * numCols);
  185. /*
  186. * Working pointer for the destination matrix that points
  187. * * to the pivot element of the particular row
  188. */
  189. pOutT1 = pOut + (l * numCols);
  190. /*
  191. * Temporary variable to hold the pivot value
  192. */
  193. in = *pInT1;
  194. /*
  195. * Destination pointer modifier
  196. */
  197. k = 1U;
  198. /*
  199. * Check if the pivot element is zero
  200. */
  201. if (*pInT1 == 0.0f)
  202. {
  203. /*
  204. * Loop over the number rows present below
  205. */
  206. for (i = (l + 1U); i < numRows; i++)
  207. {
  208. /*
  209. * Update the input and destination pointers
  210. */
  211. pInT2 = pInT1 + (numCols * i);
  212. pOutT2 = pOutT1 + (numCols * k);
  213. /*
  214. * Check if there is a non zero pivot element to
  215. * * replace in the rows below
  216. */
  217. if (*pInT2 != 0.0f)
  218. {
  219. f32x4_t vecA, vecB;
  220. /*
  221. * Loop over number of columns
  222. * * to the right of the pilot element
  223. */
  224. pTmpA = pInT1;
  225. pTmpB = pInT2;
  226. blkCnt = (numCols - l) >> 2;
  227. while (blkCnt > 0U)
  228. {
  229. vecA = vldrwq_f32(pTmpA);
  230. vecB = vldrwq_f32(pTmpB);
  231. vstrwq_f32(pTmpB, vecA);
  232. vstrwq_f32(pTmpA, vecB);
  233. pTmpA += 4;
  234. pTmpB += 4;
  235. /*
  236. * Decrement the blockSize loop counter
  237. */
  238. blkCnt--;
  239. }
  240. /*
  241. * tail
  242. * (will be merged thru tail predication)
  243. */
  244. blkCnt = (numCols - l) & 3;
  245. if (blkCnt > 0U)
  246. {
  247. mve_pred16_t p0 = vctp32q(blkCnt);
  248. vecA = vldrwq_f32(pTmpA);
  249. vecB = vldrwq_f32(pTmpB);
  250. vstrwq_p_f32(pTmpB, vecA, p0);
  251. vstrwq_p_f32(pTmpA, vecB, p0);
  252. }
  253. pInT1 += numCols - l;
  254. pInT2 += numCols - l;
  255. pTmpA = pOutT1;
  256. pTmpB = pOutT2;
  257. blkCnt = numCols >> 2;
  258. while (blkCnt > 0U)
  259. {
  260. vecA = vldrwq_f32(pTmpA);
  261. vecB = vldrwq_f32(pTmpB);
  262. vstrwq_f32(pTmpB, vecA);
  263. vstrwq_f32(pTmpA, vecB);
  264. pTmpA += 4;
  265. pTmpB += 4;
  266. /*
  267. * Decrement the blockSize loop counter
  268. */
  269. blkCnt--;
  270. }
  271. /*
  272. * tail
  273. */
  274. blkCnt = numCols & 3;
  275. if (blkCnt > 0U)
  276. {
  277. mve_pred16_t p0 = vctp32q(blkCnt);
  278. vecA = vldrwq_f32(pTmpA);
  279. vecB = vldrwq_f32(pTmpB);
  280. vstrwq_p_f32(pTmpB, vecA, p0);
  281. vstrwq_p_f32(pTmpA, vecB, p0);
  282. }
  283. pOutT1 += numCols;
  284. pOutT2 += numCols;
  285. /*
  286. * Flag to indicate whether exchange is done or not
  287. */
  288. flag = 1U;
  289. /*
  290. * Break after exchange is done
  291. */
  292. break;
  293. }
  294. /*
  295. * Update the destination pointer modifier
  296. */
  297. k++;
  298. }
  299. }
  300. /*
  301. * Update the status if the matrix is singular
  302. */
  303. if ((flag != 1U) && (in == 0.0f))
  304. {
  305. return ARM_MATH_SINGULAR;
  306. }
  307. /*
  308. * Points to the pivot row of input and destination matrices
  309. */
  310. pPivotRowIn = pIn + (l * numCols);
  311. pPivotRowDst = pOut + (l * numCols);
  312. /*
  313. * Temporary pointers to the pivot row pointers
  314. */
  315. pInT1 = pPivotRowIn;
  316. pOutT1 = pPivotRowDst;
  317. /*
  318. * Pivot element of the row
  319. */
  320. in = *(pIn + (l * numCols));
  321. pTmpA = pInT1;
  322. f32x4_t invIn = vdupq_n_f32(1.0f / in);
  323. blkCnt = (numCols - l) >> 2;
  324. f32x4_t vecA;
  325. while (blkCnt > 0U)
  326. {
  327. *(f32x4_t *) pTmpA = *(f32x4_t *) pTmpA * invIn;
  328. pTmpA += 4;
  329. /*
  330. * Decrement the blockSize loop counter
  331. */
  332. blkCnt--;
  333. }
  334. /*
  335. * tail
  336. */
  337. blkCnt = (numCols - l) & 3;
  338. if (blkCnt > 0U)
  339. {
  340. mve_pred16_t p0 = vctp32q(blkCnt);
  341. vecA = vldrwq_f32(pTmpA);
  342. vecA = vecA * invIn;
  343. vstrwq_p_f32(pTmpA, vecA, p0);
  344. }
  345. pInT1 += numCols - l;
  346. /*
  347. * Loop over number of columns
  348. * * to the right of the pilot element
  349. */
  350. pTmpA = pOutT1;
  351. blkCnt = numCols >> 2;
  352. while (blkCnt > 0U)
  353. {
  354. *(f32x4_t *) pTmpA = *(f32x4_t *) pTmpA *invIn;
  355. pTmpA += 4;
  356. /*
  357. * Decrement the blockSize loop counter
  358. */
  359. blkCnt--;
  360. }
  361. /*
  362. * tail
  363. * (will be merged thru tail predication)
  364. */
  365. blkCnt = numCols & 3;
  366. if (blkCnt > 0U)
  367. {
  368. mve_pred16_t p0 = vctp32q(blkCnt);
  369. vecA = vldrwq_f32(pTmpA);
  370. vecA = vecA * invIn;
  371. vstrwq_p_f32(pTmpA, vecA, p0);
  372. }
  373. pOutT1 += numCols;
  374. /*
  375. * Replace the rows with the sum of that row and a multiple of row i
  376. * * so that each new element in column i above row i is zero.
  377. */
  378. /*
  379. * Temporary pointers for input and destination matrices
  380. */
  381. pInT1 = pIn;
  382. pOutT1 = pOut;
  383. for (i = 0U; i < numRows; i++)
  384. {
  385. /*
  386. * Check for the pivot element
  387. */
  388. if (i == l)
  389. {
  390. /*
  391. * If the processing element is the pivot element,
  392. * only the columns to the right are to be processed
  393. */
  394. pInT1 += numCols - l;
  395. pOutT1 += numCols;
  396. }
  397. else
  398. {
  399. /*
  400. * Element of the reference row
  401. */
  402. /*
  403. * Working pointers for input and destination pivot rows
  404. */
  405. pPRT_in = pPivotRowIn;
  406. pPRT_pDst = pPivotRowDst;
  407. /*
  408. * Loop over the number of columns to the right of the pivot element,
  409. * to replace the elements in the input matrix
  410. */
  411. in = *pInT1;
  412. f32x4_t tmpV = vdupq_n_f32(in);
  413. blkCnt = (numCols - l) >> 2;
  414. while (blkCnt > 0U)
  415. {
  416. f32x4_t vec1, vec2;
  417. /*
  418. * Replace the element by the sum of that row
  419. * and a multiple of the reference row
  420. */
  421. vec1 = vldrwq_f32(pInT1);
  422. vec2 = vldrwq_f32(pPRT_in);
  423. vec1 = vfmsq_f32(vec1, tmpV, vec2);
  424. vstrwq_f32(pInT1, vec1);
  425. pPRT_in += 4;
  426. pInT1 += 4;
  427. /*
  428. * Decrement the blockSize loop counter
  429. */
  430. blkCnt--;
  431. }
  432. /*
  433. * tail
  434. * (will be merged thru tail predication)
  435. */
  436. blkCnt = (numCols - l) & 3;
  437. if (blkCnt > 0U)
  438. {
  439. f32x4_t vec1, vec2;
  440. mve_pred16_t p0 = vctp32q(blkCnt);
  441. vec1 = vldrwq_f32(pInT1);
  442. vec2 = vldrwq_f32(pPRT_in);
  443. vec1 = vfmsq_f32(vec1, tmpV, vec2);
  444. vstrwq_p_f32(pInT1, vec1, p0);
  445. pInT1 += blkCnt;
  446. }
  447. blkCnt = numCols >> 2;
  448. while (blkCnt > 0U)
  449. {
  450. f32x4_t vec1, vec2;
  451. /*
  452. * Replace the element by the sum of that row
  453. * and a multiple of the reference row
  454. */
  455. vec1 = vldrwq_f32(pOutT1);
  456. vec2 = vldrwq_f32(pPRT_pDst);
  457. vec1 = vfmsq_f32(vec1, tmpV, vec2);
  458. vstrwq_f32(pOutT1, vec1);
  459. pPRT_pDst += 4;
  460. pOutT1 += 4;
  461. /*
  462. * Decrement the blockSize loop counter
  463. */
  464. blkCnt--;
  465. }
  466. /*
  467. * tail
  468. * (will be merged thru tail predication)
  469. */
  470. blkCnt = numCols & 3;
  471. if (blkCnt > 0U)
  472. {
  473. f32x4_t vec1, vec2;
  474. mve_pred16_t p0 = vctp32q(blkCnt);
  475. vec1 = vldrwq_f32(pOutT1);
  476. vec2 = vldrwq_f32(pPRT_pDst);
  477. vec1 = vfmsq_f32(vec1, tmpV, vec2);
  478. vstrwq_p_f32(pOutT1, vec1, p0);
  479. pInT2 += blkCnt;
  480. pOutT1 += blkCnt;
  481. }
  482. }
  483. /*
  484. * Increment the temporary input pointer
  485. */
  486. pInT1 = pInT1 + l;
  487. }
  488. /*
  489. * Increment the input pointer
  490. */
  491. pIn++;
  492. /*
  493. * Decrement the loop counter
  494. */
  495. loopCnt--;
  496. /*
  497. * Increment the index modifier
  498. */
  499. l++;
  500. }
  501. /*
  502. * Set status as ARM_MATH_SUCCESS
  503. */
  504. status = ARM_MATH_SUCCESS;
  505. if ((flag != 1U) && (in == 0.0f))
  506. {
  507. pIn = pSrc->pData;
  508. for (i = 0; i < numRows * numCols; i++)
  509. {
  510. if (pIn[i] != 0.0f)
  511. break;
  512. }
  513. if (i == numRows * numCols)
  514. status = ARM_MATH_SINGULAR;
  515. }
  516. }
  517. /* Return to application */
  518. return (status);
  519. }
  520. #else
  521. #if defined(ARM_MATH_NEON)
  522. arm_status arm_mat_inverse_f32(
  523. const arm_matrix_instance_f32 * pSrc,
  524. arm_matrix_instance_f32 * pDst)
  525. {
  526. float32_t *pIn = pSrc->pData; /* input data matrix pointer */
  527. float32_t *pOut = pDst->pData; /* output data matrix pointer */
  528. float32_t *pInT1, *pInT2; /* Temporary input data matrix pointer */
  529. float32_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
  530. float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */
  531. uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */
  532. uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */
  533. float32_t Xchg, in = 0.0f, in1; /* Temporary input values */
  534. uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l; /* loop counters */
  535. arm_status status; /* status of matrix inverse */
  536. float32x4_t vec1;
  537. float32x4_t vec2;
  538. float32x4_t tmpV;
  539. #ifdef ARM_MATH_MATRIX_CHECK
  540. /* Check for matrix mismatch condition */
  541. if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols)
  542. || (pSrc->numRows != pDst->numRows))
  543. {
  544. /* Set status as ARM_MATH_SIZE_MISMATCH */
  545. status = ARM_MATH_SIZE_MISMATCH;
  546. }
  547. else
  548. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  549. {
  550. /*--------------------------------------------------------------------------------------------------------------
  551. * Matrix Inverse can be solved using elementary row operations.
  552. *
  553. * Gauss-Jordan Method:
  554. *
  555. * 1. First combine the identity matrix and the input matrix separated by a bar to form an
  556. * augmented matrix as follows:
  557. * _ _ _ _
  558. * | a11 a12 | 1 0 | | X11 X12 |
  559. * | | | = | |
  560. * |_ a21 a22 | 0 1 _| |_ X21 X21 _|
  561. *
  562. * 2. In our implementation, pDst Matrix is used as identity matrix.
  563. *
  564. * 3. Begin with the first row. Let i = 1.
  565. *
  566. * 4. Check to see if the pivot for row i is zero.
  567. * The pivot is the element of the main diagonal that is on the current row.
  568. * For instance, if working with row i, then the pivot element is aii.
  569. * If the pivot is zero, exchange that row with a row below it that does not
  570. * contain a zero in column i. If this is not possible, then an inverse
  571. * to that matrix does not exist.
  572. *
  573. * 5. Divide every element of row i by the pivot.
  574. *
  575. * 6. For every row below and row i, replace that row with the sum of that row and
  576. * a multiple of row i so that each new element in column i below row i is zero.
  577. *
  578. * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
  579. * for every element below and above the main diagonal.
  580. *
  581. * 8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc).
  582. * Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst).
  583. *----------------------------------------------------------------------------------------------------------------*/
  584. /* Working pointer for destination matrix */
  585. pOutT1 = pOut;
  586. /* Loop over the number of rows */
  587. rowCnt = numRows;
  588. /* Making the destination matrix as identity matrix */
  589. while (rowCnt > 0U)
  590. {
  591. /* Writing all zeroes in lower triangle of the destination matrix */
  592. j = numRows - rowCnt;
  593. while (j > 0U)
  594. {
  595. *pOutT1++ = 0.0f;
  596. j--;
  597. }
  598. /* Writing all ones in the diagonal of the destination matrix */
  599. *pOutT1++ = 1.0f;
  600. /* Writing all zeroes in upper triangle of the destination matrix */
  601. j = rowCnt - 1U;
  602. while (j > 0U)
  603. {
  604. *pOutT1++ = 0.0f;
  605. j--;
  606. }
  607. /* Decrement the loop counter */
  608. rowCnt--;
  609. }
  610. /* Loop over the number of columns of the input matrix.
  611. All the elements in each column are processed by the row operations */
  612. loopCnt = numCols;
  613. /* Index modifier to navigate through the columns */
  614. l = 0U;
  615. while (loopCnt > 0U)
  616. {
  617. /* Check if the pivot element is zero..
  618. * If it is zero then interchange the row with non zero row below.
  619. * If there is no non zero element to replace in the rows below,
  620. * then the matrix is Singular. */
  621. /* Working pointer for the input matrix that points
  622. * to the pivot element of the particular row */
  623. pInT1 = pIn + (l * numCols);
  624. /* Working pointer for the destination matrix that points
  625. * to the pivot element of the particular row */
  626. pOutT1 = pOut + (l * numCols);
  627. /* Temporary variable to hold the pivot value */
  628. in = *pInT1;
  629. /* Destination pointer modifier */
  630. k = 1U;
  631. /* Check if the pivot element is zero */
  632. if (*pInT1 == 0.0f)
  633. {
  634. /* Loop over the number rows present below */
  635. for (i = (l + 1U); i < numRows; i++)
  636. {
  637. /* Update the input and destination pointers */
  638. pInT2 = pInT1 + (numCols * i);
  639. pOutT2 = pOutT1 + (numCols * k);
  640. /* Check if there is a non zero pivot element to
  641. * replace in the rows below */
  642. if (*pInT2 != 0.0f)
  643. {
  644. /* Loop over number of columns
  645. * to the right of the pilot element */
  646. j = numCols - l;
  647. while (j > 0U)
  648. {
  649. /* Exchange the row elements of the input matrix */
  650. Xchg = *pInT2;
  651. *pInT2++ = *pInT1;
  652. *pInT1++ = Xchg;
  653. /* Decrement the loop counter */
  654. j--;
  655. }
  656. /* Loop over number of columns of the destination matrix */
  657. j = numCols;
  658. while (j > 0U)
  659. {
  660. /* Exchange the row elements of the destination matrix */
  661. Xchg = *pOutT2;
  662. *pOutT2++ = *pOutT1;
  663. *pOutT1++ = Xchg;
  664. /* Decrement the loop counter */
  665. j--;
  666. }
  667. /* Flag to indicate whether exchange is done or not */
  668. flag = 1U;
  669. /* Break after exchange is done */
  670. break;
  671. }
  672. /* Update the destination pointer modifier */
  673. k++;
  674. }
  675. }
  676. /* Update the status if the matrix is singular */
  677. if ((flag != 1U) && (in == 0.0f))
  678. {
  679. return ARM_MATH_SINGULAR;
  680. }
  681. /* Points to the pivot row of input and destination matrices */
  682. pPivotRowIn = pIn + (l * numCols);
  683. pPivotRowDst = pOut + (l * numCols);
  684. /* Temporary pointers to the pivot row pointers */
  685. pInT1 = pPivotRowIn;
  686. pInT2 = pPivotRowDst;
  687. /* Pivot element of the row */
  688. in = *pPivotRowIn;
  689. tmpV = vdupq_n_f32(1.0f/in);
  690. /* Loop over number of columns
  691. * to the right of the pilot element */
  692. j = (numCols - l) >> 2;
  693. while (j > 0U)
  694. {
  695. /* Divide each element of the row of the input matrix
  696. * by the pivot element */
  697. vec1 = vld1q_f32(pInT1);
  698. vec1 = vmulq_f32(vec1, tmpV);
  699. vst1q_f32(pInT1, vec1);
  700. pInT1 += 4;
  701. /* Decrement the loop counter */
  702. j--;
  703. }
  704. /* Tail */
  705. j = (numCols - l) & 3;
  706. while (j > 0U)
  707. {
  708. /* Divide each element of the row of the input matrix
  709. * by the pivot element */
  710. in1 = *pInT1;
  711. *pInT1++ = in1 / in;
  712. /* Decrement the loop counter */
  713. j--;
  714. }
  715. /* Loop over number of columns of the destination matrix */
  716. j = numCols >> 2;
  717. while (j > 0U)
  718. {
  719. /* Divide each element of the row of the destination matrix
  720. * by the pivot element */
  721. vec1 = vld1q_f32(pInT2);
  722. vec1 = vmulq_f32(vec1, tmpV);
  723. vst1q_f32(pInT2, vec1);
  724. pInT2 += 4;
  725. /* Decrement the loop counter */
  726. j--;
  727. }
  728. /* Tail */
  729. j = numCols & 3;
  730. while (j > 0U)
  731. {
  732. /* Divide each element of the row of the destination matrix
  733. * by the pivot element */
  734. in1 = *pInT2;
  735. *pInT2++ = in1 / in;
  736. /* Decrement the loop counter */
  737. j--;
  738. }
  739. /* Replace the rows with the sum of that row and a multiple of row i
  740. * so that each new element in column i above row i is zero.*/
  741. /* Temporary pointers for input and destination matrices */
  742. pInT1 = pIn;
  743. pInT2 = pOut;
  744. /* index used to check for pivot element */
  745. i = 0U;
  746. /* Loop over number of rows */
  747. /* to be replaced by the sum of that row and a multiple of row i */
  748. k = numRows;
  749. while (k > 0U)
  750. {
  751. /* Check for the pivot element */
  752. if (i == l)
  753. {
  754. /* If the processing element is the pivot element,
  755. only the columns to the right are to be processed */
  756. pInT1 += numCols - l;
  757. pInT2 += numCols;
  758. }
  759. else
  760. {
  761. /* Element of the reference row */
  762. in = *pInT1;
  763. tmpV = vdupq_n_f32(in);
  764. /* Working pointers for input and destination pivot rows */
  765. pPRT_in = pPivotRowIn;
  766. pPRT_pDst = pPivotRowDst;
  767. /* Loop over the number of columns to the right of the pivot element,
  768. to replace the elements in the input matrix */
  769. j = (numCols - l) >> 2;
  770. while (j > 0U)
  771. {
  772. /* Replace the element by the sum of that row
  773. and a multiple of the reference row */
  774. vec1 = vld1q_f32(pInT1);
  775. vec2 = vld1q_f32(pPRT_in);
  776. vec1 = vmlsq_f32(vec1, tmpV, vec2);
  777. vst1q_f32(pInT1, vec1);
  778. pPRT_in += 4;
  779. pInT1 += 4;
  780. /* Decrement the loop counter */
  781. j--;
  782. }
  783. /* Tail */
  784. j = (numCols - l) & 3;
  785. while (j > 0U)
  786. {
  787. /* Replace the element by the sum of that row
  788. and a multiple of the reference row */
  789. in1 = *pInT1;
  790. *pInT1++ = in1 - (in * *pPRT_in++);
  791. /* Decrement the loop counter */
  792. j--;
  793. }
  794. /* Loop over the number of columns to
  795. replace the elements in the destination matrix */
  796. j = numCols >> 2;
  797. while (j > 0U)
  798. {
  799. /* Replace the element by the sum of that row
  800. and a multiple of the reference row */
  801. vec1 = vld1q_f32(pInT2);
  802. vec2 = vld1q_f32(pPRT_pDst);
  803. vec1 = vmlsq_f32(vec1, tmpV, vec2);
  804. vst1q_f32(pInT2, vec1);
  805. pPRT_pDst += 4;
  806. pInT2 += 4;
  807. /* Decrement the loop counter */
  808. j--;
  809. }
  810. /* Tail */
  811. j = numCols & 3;
  812. while (j > 0U)
  813. {
  814. /* Replace the element by the sum of that row
  815. and a multiple of the reference row */
  816. in1 = *pInT2;
  817. *pInT2++ = in1 - (in * *pPRT_pDst++);
  818. /* Decrement the loop counter */
  819. j--;
  820. }
  821. }
  822. /* Increment the temporary input pointer */
  823. pInT1 = pInT1 + l;
  824. /* Decrement the loop counter */
  825. k--;
  826. /* Increment the pivot index */
  827. i++;
  828. }
  829. /* Increment the input pointer */
  830. pIn++;
  831. /* Decrement the loop counter */
  832. loopCnt--;
  833. /* Increment the index modifier */
  834. l++;
  835. }
  836. /* Set status as ARM_MATH_SUCCESS */
  837. status = ARM_MATH_SUCCESS;
  838. if ((flag != 1U) && (in == 0.0f))
  839. {
  840. pIn = pSrc->pData;
  841. for (i = 0; i < numRows * numCols; i++)
  842. {
  843. if (pIn[i] != 0.0f)
  844. break;
  845. }
  846. if (i == numRows * numCols)
  847. status = ARM_MATH_SINGULAR;
  848. }
  849. }
  850. /* Return to application */
  851. return (status);
  852. }
  853. #else
  854. arm_status arm_mat_inverse_f32(
  855. const arm_matrix_instance_f32 * pSrc,
  856. arm_matrix_instance_f32 * pDst)
  857. {
  858. float32_t *pIn = pSrc->pData; /* input data matrix pointer */
  859. float32_t *pOut = pDst->pData; /* output data matrix pointer */
  860. float32_t *pInT1, *pInT2; /* Temporary input data matrix pointer */
  861. float32_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
  862. float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */
  863. uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */
  864. uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */
  865. #if defined (ARM_MATH_DSP)
  866. float32_t Xchg, in = 0.0f, in1; /* Temporary input values */
  867. uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l; /* loop counters */
  868. arm_status status; /* status of matrix inverse */
  869. #ifdef ARM_MATH_MATRIX_CHECK
  870. /* Check for matrix mismatch condition */
  871. if ((pSrc->numRows != pSrc->numCols) ||
  872. (pDst->numRows != pDst->numCols) ||
  873. (pSrc->numRows != pDst->numRows) )
  874. {
  875. /* Set status as ARM_MATH_SIZE_MISMATCH */
  876. status = ARM_MATH_SIZE_MISMATCH;
  877. }
  878. else
  879. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  880. {
  881. /*--------------------------------------------------------------------------------------------------------------
  882. * Matrix Inverse can be solved using elementary row operations.
  883. *
  884. * Gauss-Jordan Method:
  885. *
  886. * 1. First combine the identity matrix and the input matrix separated by a bar to form an
  887. * augmented matrix as follows:
  888. * _ _ _ _
  889. * | a11 a12 | 1 0 | | X11 X12 |
  890. * | | | = | |
  891. * |_ a21 a22 | 0 1 _| |_ X21 X21 _|
  892. *
  893. * 2. In our implementation, pDst Matrix is used as identity matrix.
  894. *
  895. * 3. Begin with the first row. Let i = 1.
  896. *
  897. * 4. Check to see if the pivot for row i is zero.
  898. * The pivot is the element of the main diagonal that is on the current row.
  899. * For instance, if working with row i, then the pivot element is aii.
  900. * If the pivot is zero, exchange that row with a row below it that does not
  901. * contain a zero in column i. If this is not possible, then an inverse
  902. * to that matrix does not exist.
  903. *
  904. * 5. Divide every element of row i by the pivot.
  905. *
  906. * 6. For every row below and row i, replace that row with the sum of that row and
  907. * a multiple of row i so that each new element in column i below row i is zero.
  908. *
  909. * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
  910. * for every element below and above the main diagonal.
  911. *
  912. * 8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc).
  913. * Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst).
  914. *----------------------------------------------------------------------------------------------------------------*/
  915. /* Working pointer for destination matrix */
  916. pOutT1 = pOut;
  917. /* Loop over the number of rows */
  918. rowCnt = numRows;
  919. /* Making the destination matrix as identity matrix */
  920. while (rowCnt > 0U)
  921. {
  922. /* Writing all zeroes in lower triangle of the destination matrix */
  923. j = numRows - rowCnt;
  924. while (j > 0U)
  925. {
  926. *pOutT1++ = 0.0f;
  927. j--;
  928. }
  929. /* Writing all ones in the diagonal of the destination matrix */
  930. *pOutT1++ = 1.0f;
  931. /* Writing all zeroes in upper triangle of the destination matrix */
  932. j = rowCnt - 1U;
  933. while (j > 0U)
  934. {
  935. *pOutT1++ = 0.0f;
  936. j--;
  937. }
  938. /* Decrement loop counter */
  939. rowCnt--;
  940. }
  941. /* Loop over the number of columns of the input matrix.
  942. All the elements in each column are processed by the row operations */
  943. loopCnt = numCols;
  944. /* Index modifier to navigate through the columns */
  945. l = 0U;
  946. while (loopCnt > 0U)
  947. {
  948. /* Check if the pivot element is zero..
  949. * If it is zero then interchange the row with non zero row below.
  950. * If there is no non zero element to replace in the rows below,
  951. * then the matrix is Singular. */
  952. /* Working pointer for the input matrix that points
  953. * to the pivot element of the particular row */
  954. pInT1 = pIn + (l * numCols);
  955. /* Working pointer for the destination matrix that points
  956. * to the pivot element of the particular row */
  957. pOutT1 = pOut + (l * numCols);
  958. /* Temporary variable to hold the pivot value */
  959. in = *pInT1;
  960. /* Destination pointer modifier */
  961. k = 1U;
  962. /* Check if the pivot element is zero */
  963. if (*pInT1 == 0.0f)
  964. {
  965. /* Loop over the number rows present below */
  966. for (i = (l + 1U); i < numRows; i++)
  967. {
  968. /* Update the input and destination pointers */
  969. pInT2 = pInT1 + (numCols * i);
  970. pOutT2 = pOutT1 + (numCols * k);
  971. /* Check if there is a non zero pivot element to
  972. * replace in the rows below */
  973. if (*pInT2 != 0.0f)
  974. {
  975. /* Loop over number of columns
  976. * to the right of the pilot element */
  977. j = numCols - l;
  978. while (j > 0U)
  979. {
  980. /* Exchange the row elements of the input matrix */
  981. Xchg = *pInT2;
  982. *pInT2++ = *pInT1;
  983. *pInT1++ = Xchg;
  984. /* Decrement the loop counter */
  985. j--;
  986. }
  987. /* Loop over number of columns of the destination matrix */
  988. j = numCols;
  989. while (j > 0U)
  990. {
  991. /* Exchange the row elements of the destination matrix */
  992. Xchg = *pOutT2;
  993. *pOutT2++ = *pOutT1;
  994. *pOutT1++ = Xchg;
  995. /* Decrement loop counter */
  996. j--;
  997. }
  998. /* Flag to indicate whether exchange is done or not */
  999. flag = 1U;
  1000. /* Break after exchange is done */
  1001. break;
  1002. }
  1003. /* Update the destination pointer modifier */
  1004. k++;
  1005. /* Decrement loop counter */
  1006. }
  1007. }
  1008. /* Update the status if the matrix is singular */
  1009. if ((flag != 1U) && (in == 0.0f))
  1010. {
  1011. return ARM_MATH_SINGULAR;
  1012. }
  1013. /* Points to the pivot row of input and destination matrices */
  1014. pPivotRowIn = pIn + (l * numCols);
  1015. pPivotRowDst = pOut + (l * numCols);
  1016. /* Temporary pointers to the pivot row pointers */
  1017. pInT1 = pPivotRowIn;
  1018. pInT2 = pPivotRowDst;
  1019. /* Pivot element of the row */
  1020. in = *pPivotRowIn;
  1021. /* Loop over number of columns
  1022. * to the right of the pilot element */
  1023. j = (numCols - l);
  1024. while (j > 0U)
  1025. {
  1026. /* Divide each element of the row of the input matrix
  1027. * by the pivot element */
  1028. in1 = *pInT1;
  1029. *pInT1++ = in1 / in;
  1030. /* Decrement the loop counter */
  1031. j--;
  1032. }
  1033. /* Loop over number of columns of the destination matrix */
  1034. j = numCols;
  1035. while (j > 0U)
  1036. {
  1037. /* Divide each element of the row of the destination matrix
  1038. * by the pivot element */
  1039. in1 = *pInT2;
  1040. *pInT2++ = in1 / in;
  1041. /* Decrement the loop counter */
  1042. j--;
  1043. }
  1044. /* Replace the rows with the sum of that row and a multiple of row i
  1045. * so that each new element in column i above row i is zero.*/
  1046. /* Temporary pointers for input and destination matrices */
  1047. pInT1 = pIn;
  1048. pInT2 = pOut;
  1049. /* index used to check for pivot element */
  1050. i = 0U;
  1051. /* Loop over number of rows */
  1052. /* to be replaced by the sum of that row and a multiple of row i */
  1053. k = numRows;
  1054. while (k > 0U)
  1055. {
  1056. /* Check for the pivot element */
  1057. if (i == l)
  1058. {
  1059. /* If the processing element is the pivot element,
  1060. only the columns to the right are to be processed */
  1061. pInT1 += numCols - l;
  1062. pInT2 += numCols;
  1063. }
  1064. else
  1065. {
  1066. /* Element of the reference row */
  1067. in = *pInT1;
  1068. /* Working pointers for input and destination pivot rows */
  1069. pPRT_in = pPivotRowIn;
  1070. pPRT_pDst = pPivotRowDst;
  1071. /* Loop over the number of columns to the right of the pivot element,
  1072. to replace the elements in the input matrix */
  1073. j = (numCols - l);
  1074. while (j > 0U)
  1075. {
  1076. /* Replace the element by the sum of that row
  1077. and a multiple of the reference row */
  1078. in1 = *pInT1;
  1079. *pInT1++ = in1 - (in * *pPRT_in++);
  1080. /* Decrement the loop counter */
  1081. j--;
  1082. }
  1083. /* Loop over the number of columns to
  1084. replace the elements in the destination matrix */
  1085. j = numCols;
  1086. while (j > 0U)
  1087. {
  1088. /* Replace the element by the sum of that row
  1089. and a multiple of the reference row */
  1090. in1 = *pInT2;
  1091. *pInT2++ = in1 - (in * *pPRT_pDst++);
  1092. /* Decrement loop counter */
  1093. j--;
  1094. }
  1095. }
  1096. /* Increment temporary input pointer */
  1097. pInT1 = pInT1 + l;
  1098. /* Decrement loop counter */
  1099. k--;
  1100. /* Increment pivot index */
  1101. i++;
  1102. }
  1103. /* Increment the input pointer */
  1104. pIn++;
  1105. /* Decrement the loop counter */
  1106. loopCnt--;
  1107. /* Increment the index modifier */
  1108. l++;
  1109. }
  1110. #else
  1111. float32_t Xchg, in = 0.0f; /* Temporary input values */
  1112. uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l; /* loop counters */
  1113. arm_status status; /* status of matrix inverse */
  1114. #ifdef ARM_MATH_MATRIX_CHECK
  1115. /* Check for matrix mismatch condition */
  1116. if ((pSrc->numRows != pSrc->numCols) ||
  1117. (pDst->numRows != pDst->numCols) ||
  1118. (pSrc->numRows != pDst->numRows) )
  1119. {
  1120. /* Set status as ARM_MATH_SIZE_MISMATCH */
  1121. status = ARM_MATH_SIZE_MISMATCH;
  1122. }
  1123. else
  1124. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  1125. {
  1126. /*--------------------------------------------------------------------------------------------------------------
  1127. * Matrix Inverse can be solved using elementary row operations.
  1128. *
  1129. * Gauss-Jordan Method:
  1130. *
  1131. * 1. First combine the identity matrix and the input matrix separated by a bar to form an
  1132. * augmented matrix as follows:
  1133. * _ _ _ _ _ _ _ _
  1134. * | | a11 a12 | | | 1 0 | | | X11 X12 |
  1135. * | | | | | | | = | |
  1136. * |_ |_ a21 a22 _| | |_0 1 _| _| |_ X21 X21 _|
  1137. *
  1138. * 2. In our implementation, pDst Matrix is used as identity matrix.
  1139. *
  1140. * 3. Begin with the first row. Let i = 1.
  1141. *
  1142. * 4. Check to see if the pivot for row i is zero.
  1143. * The pivot is the element of the main diagonal that is on the current row.
  1144. * For instance, if working with row i, then the pivot element is aii.
  1145. * If the pivot is zero, exchange that row with a row below it that does not
  1146. * contain a zero in column i. If this is not possible, then an inverse
  1147. * to that matrix does not exist.
  1148. *
  1149. * 5. Divide every element of row i by the pivot.
  1150. *
  1151. * 6. For every row below and row i, replace that row with the sum of that row and
  1152. * a multiple of row i so that each new element in column i below row i is zero.
  1153. *
  1154. * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
  1155. * for every element below and above the main diagonal.
  1156. *
  1157. * 8. Now an identical matrix is formed to the left of the bar(input matrix, src).
  1158. * Therefore, the matrix to the right of the bar is our solution(dst matrix, dst).
  1159. *----------------------------------------------------------------------------------------------------------------*/
  1160. /* Working pointer for destination matrix */
  1161. pOutT1 = pOut;
  1162. /* Loop over the number of rows */
  1163. rowCnt = numRows;
  1164. /* Making the destination matrix as identity matrix */
  1165. while (rowCnt > 0U)
  1166. {
  1167. /* Writing all zeroes in lower triangle of the destination matrix */
  1168. j = numRows - rowCnt;
  1169. while (j > 0U)
  1170. {
  1171. *pOutT1++ = 0.0f;
  1172. j--;
  1173. }
  1174. /* Writing all ones in the diagonal of the destination matrix */
  1175. *pOutT1++ = 1.0f;
  1176. /* Writing all zeroes in upper triangle of the destination matrix */
  1177. j = rowCnt - 1U;
  1178. while (j > 0U)
  1179. {
  1180. *pOutT1++ = 0.0f;
  1181. j--;
  1182. }
  1183. /* Decrement loop counter */
  1184. rowCnt--;
  1185. }
  1186. /* Loop over the number of columns of the input matrix.
  1187. All the elements in each column are processed by the row operations */
  1188. loopCnt = numCols;
  1189. /* Index modifier to navigate through the columns */
  1190. l = 0U;
  1191. while (loopCnt > 0U)
  1192. {
  1193. /* Check if the pivot element is zero..
  1194. * If it is zero then interchange the row with non zero row below.
  1195. * If there is no non zero element to replace in the rows below,
  1196. * then the matrix is Singular. */
  1197. /* Working pointer for the input matrix that points
  1198. * to the pivot element of the particular row */
  1199. pInT1 = pIn + (l * numCols);
  1200. /* Working pointer for the destination matrix that points
  1201. * to the pivot element of the particular row */
  1202. pOutT1 = pOut + (l * numCols);
  1203. /* Temporary variable to hold the pivot value */
  1204. in = *pInT1;
  1205. /* Destination pointer modifier */
  1206. k = 1U;
  1207. /* Check if the pivot element is zero */
  1208. if (*pInT1 == 0.0f)
  1209. {
  1210. /* Loop over the number rows present below */
  1211. for (i = (l + 1U); i < numRows; i++)
  1212. {
  1213. /* Update the input and destination pointers */
  1214. pInT2 = pInT1 + (numCols * i);
  1215. pOutT2 = pOutT1 + (numCols * k);
  1216. /* Check if there is a non zero pivot element to
  1217. * replace in the rows below */
  1218. if (*pInT2 != 0.0f)
  1219. {
  1220. /* Loop over number of columns
  1221. * to the right of the pilot element */
  1222. for (j = 0U; j < (numCols - l); j++)
  1223. {
  1224. /* Exchange the row elements of the input matrix */
  1225. Xchg = *pInT2;
  1226. *pInT2++ = *pInT1;
  1227. *pInT1++ = Xchg;
  1228. }
  1229. for (j = 0U; j < numCols; j++)
  1230. {
  1231. Xchg = *pOutT2;
  1232. *pOutT2++ = *pOutT1;
  1233. *pOutT1++ = Xchg;
  1234. }
  1235. /* Flag to indicate whether exchange is done or not */
  1236. flag = 1U;
  1237. /* Break after exchange is done */
  1238. break;
  1239. }
  1240. /* Update the destination pointer modifier */
  1241. k++;
  1242. }
  1243. }
  1244. /* Update the status if the matrix is singular */
  1245. if ((flag != 1U) && (in == 0.0f))
  1246. {
  1247. return ARM_MATH_SINGULAR;
  1248. }
  1249. /* Points to the pivot row of input and destination matrices */
  1250. pPivotRowIn = pIn + (l * numCols);
  1251. pPivotRowDst = pOut + (l * numCols);
  1252. /* Temporary pointers to the pivot row pointers */
  1253. pInT1 = pPivotRowIn;
  1254. pOutT1 = pPivotRowDst;
  1255. /* Pivot element of the row */
  1256. in = *(pIn + (l * numCols));
  1257. /* Loop over number of columns
  1258. * to the right of the pilot element */
  1259. for (j = 0U; j < (numCols - l); j++)
  1260. {
  1261. /* Divide each element of the row of the input matrix
  1262. * by the pivot element */
  1263. *pInT1 = *pInT1 / in;
  1264. pInT1++;
  1265. }
  1266. for (j = 0U; j < numCols; j++)
  1267. {
  1268. /* Divide each element of the row of the destination matrix
  1269. * by the pivot element */
  1270. *pOutT1 = *pOutT1 / in;
  1271. pOutT1++;
  1272. }
  1273. /* Replace the rows with the sum of that row and a multiple of row i
  1274. * so that each new element in column i above row i is zero.*/
  1275. /* Temporary pointers for input and destination matrices */
  1276. pInT1 = pIn;
  1277. pOutT1 = pOut;
  1278. for (i = 0U; i < numRows; i++)
  1279. {
  1280. /* Check for the pivot element */
  1281. if (i == l)
  1282. {
  1283. /* If the processing element is the pivot element,
  1284. only the columns to the right are to be processed */
  1285. pInT1 += numCols - l;
  1286. pOutT1 += numCols;
  1287. }
  1288. else
  1289. {
  1290. /* Element of the reference row */
  1291. in = *pInT1;
  1292. /* Working pointers for input and destination pivot rows */
  1293. pPRT_in = pPivotRowIn;
  1294. pPRT_pDst = pPivotRowDst;
  1295. /* Loop over the number of columns to the right of the pivot element,
  1296. to replace the elements in the input matrix */
  1297. for (j = 0U; j < (numCols - l); j++)
  1298. {
  1299. /* Replace the element by the sum of that row
  1300. and a multiple of the reference row */
  1301. *pInT1 = *pInT1 - (in * *pPRT_in++);
  1302. pInT1++;
  1303. }
  1304. /* Loop over the number of columns to
  1305. replace the elements in the destination matrix */
  1306. for (j = 0U; j < numCols; j++)
  1307. {
  1308. /* Replace the element by the sum of that row
  1309. and a multiple of the reference row */
  1310. *pOutT1 = *pOutT1 - (in * *pPRT_pDst++);
  1311. pOutT1++;
  1312. }
  1313. }
  1314. /* Increment temporary input pointer */
  1315. pInT1 = pInT1 + l;
  1316. }
  1317. /* Increment the input pointer */
  1318. pIn++;
  1319. /* Decrement the loop counter */
  1320. loopCnt--;
  1321. /* Increment the index modifier */
  1322. l++;
  1323. }
  1324. #endif /* #if defined (ARM_MATH_DSP) */
  1325. /* Set status as ARM_MATH_SUCCESS */
  1326. status = ARM_MATH_SUCCESS;
  1327. if ((flag != 1U) && (in == 0.0f))
  1328. {
  1329. pIn = pSrc->pData;
  1330. for (i = 0; i < numRows * numCols; i++)
  1331. {
  1332. if (pIn[i] != 0.0f)
  1333. break;
  1334. }
  1335. if (i == numRows * numCols)
  1336. status = ARM_MATH_SINGULAR;
  1337. }
  1338. }
  1339. /* Return to application */
  1340. return (status);
  1341. }
  1342. #endif /* #if defined(ARM_MATH_NEON) */
  1343. #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
  1344. /**
  1345. @} end of MatrixInv group
  1346. */