arm_mat_inverse_f16.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_mat_inverse_f16.c
  4. * Description: Floating-point matrix inverse
  5. *
  6. * $Date: 23 April 2021
  7. * $Revision: V1.9.0
  8. *
  9. * Target Processor: Cortex-M and Cortex-A cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/matrix_functions_f16.h"
  29. #if defined(ARM_FLOAT16_SUPPORTED)
  30. /**
  31. @ingroup groupMatrix
  32. */
  33. /**
  34. @addtogroup MatrixInv
  35. @{
  36. */
  37. /**
  38. @brief Floating-point matrix inverse.
  39. @param[in] pSrc points to input matrix structure. The source matrix is modified by the function.
  40. @param[out] pDst points to output matrix structure
  41. @return execution status
  42. - \ref ARM_MATH_SUCCESS : Operation successful
  43. - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
  44. - \ref ARM_MATH_SINGULAR : Input matrix is found to be singular (non-invertible)
  45. */
  46. #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
  47. arm_status arm_mat_inverse_f16(
  48. const arm_matrix_instance_f16 * pSrc,
  49. arm_matrix_instance_f16 * pDst)
  50. {
  51. float16_t *pIn = pSrc->pData; /* input data matrix pointer */
  52. float16_t *pOut = pDst->pData; /* output data matrix pointer */
  53. float16_t *pInT1, *pInT2; /* Temporary input data matrix pointer */
  54. float16_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
  55. float16_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */
  56. uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */
  57. uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */
  58. float16_t *pTmpA, *pTmpB;
  59. _Float16 in = 0.0f16; /* Temporary input values */
  60. uint32_t i, rowCnt, flag = 0U, j, loopCnt, l; /* loop counters */
  61. arm_status status; /* status of matrix inverse */
  62. uint32_t blkCnt;
  63. #ifdef ARM_MATH_MATRIX_CHECK
  64. /* Check for matrix mismatch condition */
  65. if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols)
  66. || (pSrc->numRows != pDst->numRows))
  67. {
  68. /* Set status as ARM_MATH_SIZE_MISMATCH */
  69. status = ARM_MATH_SIZE_MISMATCH;
  70. }
  71. else
  72. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  73. {
  74. /*--------------------------------------------------------------------------------------------------------------
  75. * Matrix Inverse can be solved using elementary row operations.
  76. *
  77. * Gauss-Jordan Method:
  78. *
  79. * 1. First combine the identity matrix and the input matrix separated by a bar to form an
  80. * augmented matrix as follows:
  81. * _ _ _ _ _ _ _ _
  82. * | | a11 a12 | | | 1 0 | | | X11 X12 |
  83. * | | | | | | | = | |
  84. * |_ |_ a21 a22 _| | |_0 1 _| _| |_ X21 X21 _|
  85. *
  86. * 2. In our implementation, pDst Matrix is used as identity matrix.
  87. *
  88. * 3. Begin with the first row. Let i = 1.
  89. *
  90. * 4. Check to see if the pivot for row i is zero.
  91. * The pivot is the element of the main diagonal that is on the current row.
  92. * For instance, if working with row i, then the pivot element is aii.
  93. * If the pivot is zero, exchange that row with a row below it that does not
  94. * contain a zero in column i. If this is not possible, then an inverse
  95. * to that matrix does not exist.
  96. *
  97. * 5. Divide every element of row i by the pivot.
  98. *
  99. * 6. For every row below and row i, replace that row with the sum of that row and
  100. * a multiple of row i so that each new element in column i below row i is zero.
  101. *
  102. * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
  103. * for every element below and above the main diagonal.
  104. *
  105. * 8. Now an identical matrix is formed to the left of the bar(input matrix, src).
  106. * Therefore, the matrix to the right of the bar is our solution(dst matrix, dst).
  107. *----------------------------------------------------------------------------------------------------------------*/
  108. /*
  109. * Working pointer for destination matrix
  110. */
  111. pOutT1 = pOut;
  112. /*
  113. * Loop over the number of rows
  114. */
  115. rowCnt = numRows;
  116. /*
  117. * Making the destination matrix as identity matrix
  118. */
  119. while (rowCnt > 0U)
  120. {
  121. /*
  122. * Writing all zeroes in lower triangle of the destination matrix
  123. */
  124. j = numRows - rowCnt;
  125. while (j > 0U)
  126. {
  127. *pOutT1++ = 0.0f16;
  128. j--;
  129. }
  130. /*
  131. * Writing all ones in the diagonal of the destination matrix
  132. */
  133. *pOutT1++ = 1.0f16;
  134. /*
  135. * Writing all zeroes in upper triangle of the destination matrix
  136. */
  137. j = rowCnt - 1U;
  138. while (j > 0U)
  139. {
  140. *pOutT1++ = 0.0f16;
  141. j--;
  142. }
  143. /*
  144. * Decrement the loop counter
  145. */
  146. rowCnt--;
  147. }
  148. /*
  149. * Loop over the number of columns of the input matrix.
  150. * All the elements in each column are processed by the row operations
  151. */
  152. loopCnt = numCols;
  153. /*
  154. * Index modifier to navigate through the columns
  155. */
  156. l = 0U;
  157. while (loopCnt > 0U)
  158. {
  159. /*
  160. * Check if the pivot element is zero..
  161. * If it is zero then interchange the row with non zero row below.
  162. * If there is no non zero element to replace in the rows below,
  163. * then the matrix is Singular.
  164. */
  165. /*
  166. * Working pointer for the input matrix that points
  167. * * to the pivot element of the particular row
  168. */
  169. pInT1 = pIn + (l * numCols);
  170. /*
  171. * Working pointer for the destination matrix that points
  172. * * to the pivot element of the particular row
  173. */
  174. pOutT1 = pOut + (l * numCols);
  175. /*
  176. * Temporary variable to hold the pivot value
  177. */
  178. in = *pInT1;
  179. /*
  180. * Check if the pivot element is zero
  181. */
  182. if ((_Float16)*pInT1 == 0.0f16)
  183. {
  184. /*
  185. * Loop over the number rows present below
  186. */
  187. for (i = 1U; i < numRows-l; i++)
  188. {
  189. /*
  190. * Update the input and destination pointers
  191. */
  192. pInT2 = pInT1 + (numCols * i);
  193. pOutT2 = pOutT1 + (numCols * i);
  194. /*
  195. * Check if there is a non zero pivot element to
  196. * * replace in the rows below
  197. */
  198. if ((_Float16)*pInT2 != 0.0f16)
  199. {
  200. f16x8_t vecA, vecB;
  201. /*
  202. * Loop over number of columns
  203. * * to the right of the pilot element
  204. */
  205. pTmpA = pInT1;
  206. pTmpB = pInT2;
  207. blkCnt = (numCols - l) >> 3;
  208. while (blkCnt > 0U)
  209. {
  210. vecA = vldrhq_f16(pTmpA);
  211. vecB = vldrhq_f16(pTmpB);
  212. vstrhq_f16(pTmpB, vecA);
  213. vstrhq_f16(pTmpA, vecB);
  214. pTmpA += 8;
  215. pTmpB += 8;
  216. /*
  217. * Decrement the blockSize loop counter
  218. */
  219. blkCnt--;
  220. }
  221. /*
  222. * tail
  223. * (will be merged thru tail predication)
  224. */
  225. blkCnt = (numCols - l) & 7;
  226. if (blkCnt > 0U)
  227. {
  228. mve_pred16_t p0 = vctp16q(blkCnt);
  229. vecA = vldrhq_f16(pTmpA);
  230. vecB = vldrhq_f16(pTmpB);
  231. vstrhq_p_f16(pTmpB, vecA, p0);
  232. vstrhq_p_f16(pTmpA, vecB, p0);
  233. }
  234. pInT1 += numCols - l;
  235. pInT2 += numCols - l;
  236. pTmpA = pOutT1;
  237. pTmpB = pOutT2;
  238. blkCnt = numCols >> 3;
  239. while (blkCnt > 0U)
  240. {
  241. vecA = vldrhq_f16(pTmpA);
  242. vecB = vldrhq_f16(pTmpB);
  243. vstrhq_f16(pTmpB, vecA);
  244. vstrhq_f16(pTmpA, vecB);
  245. pTmpA += 8;
  246. pTmpB += 8;
  247. /*
  248. * Decrement the blockSize loop counter
  249. */
  250. blkCnt--;
  251. }
  252. /*
  253. * tail
  254. */
  255. blkCnt = numCols & 7;
  256. if (blkCnt > 0U)
  257. {
  258. mve_pred16_t p0 = vctp16q(blkCnt);
  259. vecA = vldrhq_f16(pTmpA);
  260. vecB = vldrhq_f16(pTmpB);
  261. vstrhq_p_f16(pTmpB, vecA, p0);
  262. vstrhq_p_f16(pTmpA, vecB, p0);
  263. }
  264. pOutT1 += numCols;
  265. pOutT2 += numCols;
  266. /*
  267. * Flag to indicate whether exchange is done or not
  268. */
  269. flag = 1U;
  270. /*
  271. * Break after exchange is done
  272. */
  273. break;
  274. }
  275. }
  276. }
  277. /*
  278. * Update the status if the matrix is singular
  279. */
  280. if ((flag != 1U) && (in == 0.0f16))
  281. {
  282. return ARM_MATH_SINGULAR;
  283. }
  284. /*
  285. * Points to the pivot row of input and destination matrices
  286. */
  287. pPivotRowIn = pIn + (l * numCols);
  288. pPivotRowDst = pOut + (l * numCols);
  289. /*
  290. * Temporary pointers to the pivot row pointers
  291. */
  292. pInT1 = pPivotRowIn;
  293. pOutT1 = pPivotRowDst;
  294. /*
  295. * Pivot element of the row
  296. */
  297. in = *(pIn + (l * numCols));
  298. pTmpA = pInT1;
  299. f16x8_t invIn = vdupq_n_f16(1.0f16 / in);
  300. blkCnt = (numCols - l) >> 3;
  301. f16x8_t vecA;
  302. while (blkCnt > 0U)
  303. {
  304. *(f16x8_t *) pTmpA = *(f16x8_t *) pTmpA * invIn;
  305. pTmpA += 8;
  306. /*
  307. * Decrement the blockSize loop counter
  308. */
  309. blkCnt--;
  310. }
  311. /*
  312. * tail
  313. */
  314. blkCnt = (numCols - l) & 7;
  315. if (blkCnt > 0U)
  316. {
  317. mve_pred16_t p0 = vctp16q(blkCnt);
  318. vecA = vldrhq_f16(pTmpA);
  319. vecA = vecA * invIn;
  320. vstrhq_p_f16(pTmpA, vecA, p0);
  321. }
  322. pInT1 += numCols - l;
  323. /*
  324. * Loop over number of columns
  325. * * to the right of the pilot element
  326. */
  327. pTmpA = pOutT1;
  328. blkCnt = numCols >> 3;
  329. while (blkCnt > 0U)
  330. {
  331. *(f16x8_t *) pTmpA = *(f16x8_t *) pTmpA *invIn;
  332. pTmpA += 8;
  333. /*
  334. * Decrement the blockSize loop counter
  335. */
  336. blkCnt--;
  337. }
  338. /*
  339. * tail
  340. * (will be merged thru tail predication)
  341. */
  342. blkCnt = numCols & 7;
  343. if (blkCnt > 0U)
  344. {
  345. mve_pred16_t p0 = vctp16q(blkCnt);
  346. vecA = vldrhq_f16(pTmpA);
  347. vecA = vecA * invIn;
  348. vstrhq_p_f16(pTmpA, vecA, p0);
  349. }
  350. pOutT1 += numCols;
  351. /*
  352. * Replace the rows with the sum of that row and a multiple of row i
  353. * * so that each new element in column i above row i is zero.
  354. */
  355. /*
  356. * Temporary pointers for input and destination matrices
  357. */
  358. pInT1 = pIn;
  359. pOutT1 = pOut;
  360. for (i = 0U; i < numRows; i++)
  361. {
  362. /*
  363. * Check for the pivot element
  364. */
  365. if (i == l)
  366. {
  367. /*
  368. * If the processing element is the pivot element,
  369. * only the columns to the right are to be processed
  370. */
  371. pInT1 += numCols - l;
  372. pOutT1 += numCols;
  373. }
  374. else
  375. {
  376. /*
  377. * Element of the reference row
  378. */
  379. /*
  380. * Working pointers for input and destination pivot rows
  381. */
  382. pPRT_in = pPivotRowIn;
  383. pPRT_pDst = pPivotRowDst;
  384. /*
  385. * Loop over the number of columns to the right of the pivot element,
  386. * to replace the elements in the input matrix
  387. */
  388. in = *pInT1;
  389. f16x8_t tmpV = vdupq_n_f16(in);
  390. blkCnt = (numCols - l) >> 3;
  391. while (blkCnt > 0U)
  392. {
  393. f16x8_t vec1, vec2;
  394. /*
  395. * Replace the element by the sum of that row
  396. * and a multiple of the reference row
  397. */
  398. vec1 = vldrhq_f16(pInT1);
  399. vec2 = vldrhq_f16(pPRT_in);
  400. vec1 = vfmsq_f16(vec1, tmpV, vec2);
  401. vstrhq_f16(pInT1, vec1);
  402. pPRT_in += 8;
  403. pInT1 += 8;
  404. /*
  405. * Decrement the blockSize loop counter
  406. */
  407. blkCnt--;
  408. }
  409. /*
  410. * tail
  411. * (will be merged thru tail predication)
  412. */
  413. blkCnt = (numCols - l) & 7;
  414. if (blkCnt > 0U)
  415. {
  416. f16x8_t vec1, vec2;
  417. mve_pred16_t p0 = vctp16q(blkCnt);
  418. vec1 = vldrhq_f16(pInT1);
  419. vec2 = vldrhq_f16(pPRT_in);
  420. vec1 = vfmsq_f16(vec1, tmpV, vec2);
  421. vstrhq_p_f16(pInT1, vec1, p0);
  422. pInT1 += blkCnt;
  423. }
  424. blkCnt = numCols >> 3;
  425. while (blkCnt > 0U)
  426. {
  427. f16x8_t vec1, vec2;
  428. /*
  429. * Replace the element by the sum of that row
  430. * and a multiple of the reference row
  431. */
  432. vec1 = vldrhq_f16(pOutT1);
  433. vec2 = vldrhq_f16(pPRT_pDst);
  434. vec1 = vfmsq_f16(vec1, tmpV, vec2);
  435. vstrhq_f16(pOutT1, vec1);
  436. pPRT_pDst += 8;
  437. pOutT1 += 8;
  438. /*
  439. * Decrement the blockSize loop counter
  440. */
  441. blkCnt--;
  442. }
  443. /*
  444. * tail
  445. * (will be merged thru tail predication)
  446. */
  447. blkCnt = numCols & 7;
  448. if (blkCnt > 0U)
  449. {
  450. f16x8_t vec1, vec2;
  451. mve_pred16_t p0 = vctp16q(blkCnt);
  452. vec1 = vldrhq_f16(pOutT1);
  453. vec2 = vldrhq_f16(pPRT_pDst);
  454. vec1 = vfmsq_f16(vec1, tmpV, vec2);
  455. vstrhq_p_f16(pOutT1, vec1, p0);
  456. pInT2 += blkCnt;
  457. pOutT1 += blkCnt;
  458. }
  459. }
  460. /*
  461. * Increment the temporary input pointer
  462. */
  463. pInT1 = pInT1 + l;
  464. }
  465. /*
  466. * Increment the input pointer
  467. */
  468. pIn++;
  469. /*
  470. * Decrement the loop counter
  471. */
  472. loopCnt--;
  473. /*
  474. * Increment the index modifier
  475. */
  476. l++;
  477. }
  478. /*
  479. * Set status as ARM_MATH_SUCCESS
  480. */
  481. status = ARM_MATH_SUCCESS;
  482. if ((flag != 1U) && (in == 0.0f16))
  483. {
  484. pIn = pSrc->pData;
  485. for (i = 0; i < numRows * numCols; i++)
  486. {
  487. if ((_Float16)pIn[i] != 0.0f16)
  488. break;
  489. }
  490. if (i == numRows * numCols)
  491. status = ARM_MATH_SINGULAR;
  492. }
  493. }
  494. /* Return to application */
  495. return (status);
  496. }
  497. #else
  498. arm_status arm_mat_inverse_f16(
  499. const arm_matrix_instance_f16 * pSrc,
  500. arm_matrix_instance_f16 * pDst)
  501. {
  502. float16_t *pIn = pSrc->pData; /* input data matrix pointer */
  503. float16_t *pOut = pDst->pData; /* output data matrix pointer */
  504. float16_t *pInT1, *pInT2; /* Temporary input data matrix pointer */
  505. float16_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
  506. float16_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */
  507. uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */
  508. uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */
  509. _Float16 Xchg, in = 0.0f16, in1; /* Temporary input values */
  510. uint32_t i, rowCnt, flag = 0U, j, loopCnt, k,l; /* loop counters */
  511. arm_status status; /* status of matrix inverse */
  512. #ifdef ARM_MATH_MATRIX_CHECK
  513. /* Check for matrix mismatch condition */
  514. if ((pSrc->numRows != pSrc->numCols) ||
  515. (pDst->numRows != pDst->numCols) ||
  516. (pSrc->numRows != pDst->numRows) )
  517. {
  518. /* Set status as ARM_MATH_SIZE_MISMATCH */
  519. status = ARM_MATH_SIZE_MISMATCH;
  520. }
  521. else
  522. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  523. {
  524. /*--------------------------------------------------------------------------------------------------------------
  525. * Matrix Inverse can be solved using elementary row operations.
  526. *
  527. * Gauss-Jordan Method:
  528. *
  529. * 1. First combine the identity matrix and the input matrix separated by a bar to form an
  530. * augmented matrix as follows:
  531. * _ _ _ _
  532. * | a11 a12 | 1 0 | | X11 X12 |
  533. * | | | = | |
  534. * |_ a21 a22 | 0 1 _| |_ X21 X21 _|
  535. *
  536. * 2. In our implementation, pDst Matrix is used as identity matrix.
  537. *
  538. * 3. Begin with the first row. Let i = 1.
  539. *
  540. * 4. Check to see if the pivot for row i is zero.
  541. * The pivot is the element of the main diagonal that is on the current row.
  542. * For instance, if working with row i, then the pivot element is aii.
  543. * If the pivot is zero, exchange that row with a row below it that does not
  544. * contain a zero in column i. If this is not possible, then an inverse
  545. * to that matrix does not exist.
  546. *
  547. * 5. Divide every element of row i by the pivot.
  548. *
  549. * 6. For every row below and row i, replace that row with the sum of that row and
  550. * a multiple of row i so that each new element in column i below row i is zero.
  551. *
  552. * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
  553. * for every element below and above the main diagonal.
  554. *
  555. * 8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc).
  556. * Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst).
  557. *----------------------------------------------------------------------------------------------------------------*/
  558. /* Working pointer for destination matrix */
  559. pOutT1 = pOut;
  560. /* Loop over the number of rows */
  561. rowCnt = numRows;
  562. /* Making the destination matrix as identity matrix */
  563. while (rowCnt > 0U)
  564. {
  565. /* Writing all zeroes in lower triangle of the destination matrix */
  566. j = numRows - rowCnt;
  567. while (j > 0U)
  568. {
  569. *pOutT1++ = 0.0f16;
  570. j--;
  571. }
  572. /* Writing all ones in the diagonal of the destination matrix */
  573. *pOutT1++ = 1.0f16;
  574. /* Writing all zeroes in upper triangle of the destination matrix */
  575. j = rowCnt - 1U;
  576. while (j > 0U)
  577. {
  578. *pOutT1++ = 0.0f16;
  579. j--;
  580. }
  581. /* Decrement loop counter */
  582. rowCnt--;
  583. }
  584. /* Loop over the number of columns of the input matrix.
  585. All the elements in each column are processed by the row operations */
  586. loopCnt = numCols;
  587. /* Index modifier to navigate through the columns */
  588. l = 0U;
  589. while (loopCnt > 0U)
  590. {
  591. /* Check if the pivot element is zero..
  592. * If it is zero then interchange the row with non zero row below.
  593. * If there is no non zero element to replace in the rows below,
  594. * then the matrix is Singular. */
  595. /* Working pointer for the input matrix that points
  596. * to the pivot element of the particular row */
  597. pInT1 = pIn + (l * numCols);
  598. /* Working pointer for the destination matrix that points
  599. * to the pivot element of the particular row */
  600. pOutT1 = pOut + (l * numCols);
  601. /* Temporary variable to hold the pivot value */
  602. in = *pInT1;
  603. /* Check if the pivot element is zero */
  604. if ((_Float16)*pInT1 == 0.0f16)
  605. {
  606. /* Loop over the number rows present below */
  607. for (i = 1U; i < numRows-l; i++)
  608. {
  609. /* Update the input and destination pointers */
  610. pInT2 = pInT1 + (numCols * i);
  611. pOutT2 = pOutT1 + (numCols * i);
  612. /* Check if there is a non zero pivot element to
  613. * replace in the rows below */
  614. if ((_Float16)*pInT2 != 0.0f16)
  615. {
  616. /* Loop over number of columns
  617. * to the right of the pilot element */
  618. j = numCols - l;
  619. while (j > 0U)
  620. {
  621. /* Exchange the row elements of the input matrix */
  622. Xchg = *pInT2;
  623. *pInT2++ = *pInT1;
  624. *pInT1++ = Xchg;
  625. /* Decrement the loop counter */
  626. j--;
  627. }
  628. /* Loop over number of columns of the destination matrix */
  629. j = numCols;
  630. while (j > 0U)
  631. {
  632. /* Exchange the row elements of the destination matrix */
  633. Xchg = *pOutT2;
  634. *pOutT2++ = *pOutT1;
  635. *pOutT1++ = Xchg;
  636. /* Decrement loop counter */
  637. j--;
  638. }
  639. /* Flag to indicate whether exchange is done or not */
  640. flag = 1U;
  641. /* Break after exchange is done */
  642. break;
  643. }
  644. }
  645. }
  646. /* Update the status if the matrix is singular */
  647. if ((flag != 1U) && (in == 0.0f16))
  648. {
  649. return ARM_MATH_SINGULAR;
  650. }
  651. /* Points to the pivot row of input and destination matrices */
  652. pPivotRowIn = pIn + (l * numCols);
  653. pPivotRowDst = pOut + (l * numCols);
  654. /* Temporary pointers to the pivot row pointers */
  655. pInT1 = pPivotRowIn;
  656. pInT2 = pPivotRowDst;
  657. /* Pivot element of the row */
  658. in = *pPivotRowIn;
  659. /* Loop over number of columns
  660. * to the right of the pilot element */
  661. j = (numCols - l);
  662. while (j > 0U)
  663. {
  664. /* Divide each element of the row of the input matrix
  665. * by the pivot element */
  666. in1 = *pInT1;
  667. *pInT1++ = in1 / in;
  668. /* Decrement the loop counter */
  669. j--;
  670. }
  671. /* Loop over number of columns of the destination matrix */
  672. j = numCols;
  673. while (j > 0U)
  674. {
  675. /* Divide each element of the row of the destination matrix
  676. * by the pivot element */
  677. in1 = *pInT2;
  678. *pInT2++ = in1 / in;
  679. /* Decrement the loop counter */
  680. j--;
  681. }
  682. /* Replace the rows with the sum of that row and a multiple of row i
  683. * so that each new element in column i above row i is zero.*/
  684. /* Temporary pointers for input and destination matrices */
  685. pInT1 = pIn;
  686. pInT2 = pOut;
  687. /* index used to check for pivot element */
  688. i = 0U;
  689. /* Loop over number of rows */
  690. /* to be replaced by the sum of that row and a multiple of row i */
  691. k = numRows;
  692. while (k > 0U)
  693. {
  694. /* Check for the pivot element */
  695. if (i == l)
  696. {
  697. /* If the processing element is the pivot element,
  698. only the columns to the right are to be processed */
  699. pInT1 += numCols - l;
  700. pInT2 += numCols;
  701. }
  702. else
  703. {
  704. /* Element of the reference row */
  705. in = *pInT1;
  706. /* Working pointers for input and destination pivot rows */
  707. pPRT_in = pPivotRowIn;
  708. pPRT_pDst = pPivotRowDst;
  709. /* Loop over the number of columns to the right of the pivot element,
  710. to replace the elements in the input matrix */
  711. j = (numCols - l);
  712. while (j > 0U)
  713. {
  714. /* Replace the element by the sum of that row
  715. and a multiple of the reference row */
  716. in1 = *pInT1;
  717. *pInT1++ = (_Float16)in1 - ((_Float16)in * (_Float16)*pPRT_in++);
  718. /* Decrement the loop counter */
  719. j--;
  720. }
  721. /* Loop over the number of columns to
  722. replace the elements in the destination matrix */
  723. j = numCols;
  724. while (j > 0U)
  725. {
  726. /* Replace the element by the sum of that row
  727. and a multiple of the reference row */
  728. in1 = *pInT2;
  729. *pInT2++ = (_Float16)in1 - ((_Float16)in * (_Float16)*pPRT_pDst++);
  730. /* Decrement loop counter */
  731. j--;
  732. }
  733. }
  734. /* Increment temporary input pointer */
  735. pInT1 = pInT1 + l;
  736. /* Decrement loop counter */
  737. k--;
  738. /* Increment pivot index */
  739. i++;
  740. }
  741. /* Increment the input pointer */
  742. pIn++;
  743. /* Decrement the loop counter */
  744. loopCnt--;
  745. /* Increment the index modifier */
  746. l++;
  747. }
  748. /* Set status as ARM_MATH_SUCCESS */
  749. status = ARM_MATH_SUCCESS;
  750. if ((flag != 1U) && ((_Float16)in == 0.0f16))
  751. {
  752. pIn = pSrc->pData;
  753. for (i = 0; i < numRows * numCols; i++)
  754. {
  755. if ((_Float16)pIn[i] != 0.0f16)
  756. break;
  757. }
  758. if (i == numRows * numCols)
  759. status = ARM_MATH_SINGULAR;
  760. }
  761. }
  762. /* Return to application */
  763. return (status);
  764. }
  765. #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
  766. /**
  767. @} end of MatrixInv group
  768. */
  769. #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */