|
|
@@ -27,6 +27,7 @@
|
|
|
*/
|
|
|
|
|
|
#include "dsp/matrix_functions_f16.h"
|
|
|
+#include "dsp/matrix_utils.h"
|
|
|
|
|
|
#if defined(ARM_FLOAT16_SUPPORTED)
|
|
|
|
|
|
@@ -50,520 +51,20 @@
|
|
|
- \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
|
|
|
- \ref ARM_MATH_SINGULAR : Input matrix is found to be singular (non-invertible)
|
|
|
*/
|
|
|
-#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
|
|
|
-
|
|
|
-arm_status arm_mat_inverse_f16(
|
|
|
- const arm_matrix_instance_f16 * pSrc,
|
|
|
- arm_matrix_instance_f16 * pDst)
|
|
|
-{
|
|
|
- float16_t *pIn = pSrc->pData; /* input data matrix pointer */
|
|
|
- float16_t *pOut = pDst->pData; /* output data matrix pointer */
|
|
|
- float16_t *pInT1, *pInT2; /* Temporary input data matrix pointer */
|
|
|
- float16_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
|
|
|
- float16_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */
|
|
|
-
|
|
|
- uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */
|
|
|
- uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */
|
|
|
- float16_t *pTmpA, *pTmpB;
|
|
|
-
|
|
|
- _Float16 in = 0.0f16; /* Temporary input values */
|
|
|
- uint32_t i, rowCnt, flag = 0U, j, loopCnt, l; /* loop counters */
|
|
|
- arm_status status; /* status of matrix inverse */
|
|
|
- uint32_t blkCnt;
|
|
|
-
|
|
|
-#ifdef ARM_MATH_MATRIX_CHECK
|
|
|
- /* Check for matrix mismatch condition */
|
|
|
- if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols)
|
|
|
- || (pSrc->numRows != pDst->numRows))
|
|
|
- {
|
|
|
- /* Set status as ARM_MATH_SIZE_MISMATCH */
|
|
|
- status = ARM_MATH_SIZE_MISMATCH;
|
|
|
- }
|
|
|
- else
|
|
|
-#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
|
|
|
- {
|
|
|
-
|
|
|
- /*--------------------------------------------------------------------------------------------------------------
|
|
|
- * Matrix Inverse can be solved using elementary row operations.
|
|
|
- *
|
|
|
- * Gauss-Jordan Method:
|
|
|
- *
|
|
|
- * 1. First combine the identity matrix and the input matrix separated by a bar to form an
|
|
|
- * augmented matrix as follows:
|
|
|
- * _ _ _ _ _ _ _ _
|
|
|
- * | | a11 a12 | | | 1 0 | | | X11 X12 |
|
|
|
- * | | | | | | | = | |
|
|
|
- * |_ |_ a21 a22 _| | |_0 1 _| _| |_ X21 X21 _|
|
|
|
- *
|
|
|
- * 2. In our implementation, pDst Matrix is used as identity matrix.
|
|
|
- *
|
|
|
- * 3. Begin with the first row. Let i = 1.
|
|
|
- *
|
|
|
- * 4. Check to see if the pivot for row i is zero.
|
|
|
- * The pivot is the element of the main diagonal that is on the current row.
|
|
|
- * For instance, if working with row i, then the pivot element is aii.
|
|
|
- * If the pivot is zero, exchange that row with a row below it that does not
|
|
|
- * contain a zero in column i. If this is not possible, then an inverse
|
|
|
- * to that matrix does not exist.
|
|
|
- *
|
|
|
- * 5. Divide every element of row i by the pivot.
|
|
|
- *
|
|
|
- * 6. For every row below and row i, replace that row with the sum of that row and
|
|
|
- * a multiple of row i so that each new element in column i below row i is zero.
|
|
|
- *
|
|
|
- * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
|
|
|
- * for every element below and above the main diagonal.
|
|
|
- *
|
|
|
- * 8. Now an identical matrix is formed to the left of the bar(input matrix, src).
|
|
|
- * Therefore, the matrix to the right of the bar is our solution(dst matrix, dst).
|
|
|
- *----------------------------------------------------------------------------------------------------------------*/
|
|
|
-
|
|
|
- /*
|
|
|
- * Working pointer for destination matrix
|
|
|
- */
|
|
|
- pOutT1 = pOut;
|
|
|
- /*
|
|
|
- * Loop over the number of rows
|
|
|
- */
|
|
|
- rowCnt = numRows;
|
|
|
- /*
|
|
|
- * Making the destination matrix as identity matrix
|
|
|
- */
|
|
|
- while (rowCnt > 0U)
|
|
|
- {
|
|
|
- /*
|
|
|
- * Writing all zeroes in lower triangle of the destination matrix
|
|
|
- */
|
|
|
- j = numRows - rowCnt;
|
|
|
- while (j > 0U)
|
|
|
- {
|
|
|
- *pOutT1++ = 0.0f16;
|
|
|
- j--;
|
|
|
- }
|
|
|
- /*
|
|
|
- * Writing all ones in the diagonal of the destination matrix
|
|
|
- */
|
|
|
- *pOutT1++ = 1.0f16;
|
|
|
- /*
|
|
|
- * Writing all zeroes in upper triangle of the destination matrix
|
|
|
- */
|
|
|
- j = rowCnt - 1U;
|
|
|
- while (j > 0U)
|
|
|
- {
|
|
|
- *pOutT1++ = 0.0f16;
|
|
|
- j--;
|
|
|
- }
|
|
|
- /*
|
|
|
- * Decrement the loop counter
|
|
|
- */
|
|
|
- rowCnt--;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Loop over the number of columns of the input matrix.
|
|
|
- * All the elements in each column are processed by the row operations
|
|
|
- */
|
|
|
- loopCnt = numCols;
|
|
|
- /*
|
|
|
- * Index modifier to navigate through the columns
|
|
|
- */
|
|
|
- l = 0U;
|
|
|
- while (loopCnt > 0U)
|
|
|
- {
|
|
|
- /*
|
|
|
- * Check if the pivot element is zero..
|
|
|
- * If it is zero then interchange the row with non zero row below.
|
|
|
- * If there is no non zero element to replace in the rows below,
|
|
|
- * then the matrix is Singular.
|
|
|
- */
|
|
|
-
|
|
|
- /*
|
|
|
- * Working pointer for the input matrix that points
|
|
|
- * * to the pivot element of the particular row
|
|
|
- */
|
|
|
- pInT1 = pIn + (l * numCols);
|
|
|
- /*
|
|
|
- * Working pointer for the destination matrix that points
|
|
|
- * * to the pivot element of the particular row
|
|
|
- */
|
|
|
- pOutT1 = pOut + (l * numCols);
|
|
|
- /*
|
|
|
- * Temporary variable to hold the pivot value
|
|
|
- */
|
|
|
- in = *pInT1;
|
|
|
-
|
|
|
-
|
|
|
- /*
|
|
|
- * Check if the pivot element is zero
|
|
|
- */
|
|
|
- if ((_Float16)*pInT1 == 0.0f16)
|
|
|
- {
|
|
|
- /*
|
|
|
- * Loop over the number rows present below
|
|
|
- */
|
|
|
- for (i = 1U; i < numRows-l; i++)
|
|
|
- {
|
|
|
- /*
|
|
|
- * Update the input and destination pointers
|
|
|
- */
|
|
|
- pInT2 = pInT1 + (numCols * i);
|
|
|
- pOutT2 = pOutT1 + (numCols * i);
|
|
|
- /*
|
|
|
- * Check if there is a non zero pivot element to
|
|
|
- * * replace in the rows below
|
|
|
- */
|
|
|
- if ((_Float16)*pInT2 != 0.0f16)
|
|
|
- {
|
|
|
- f16x8_t vecA, vecB;
|
|
|
- /*
|
|
|
- * Loop over number of columns
|
|
|
- * * to the right of the pilot element
|
|
|
- */
|
|
|
- pTmpA = pInT1;
|
|
|
- pTmpB = pInT2;
|
|
|
- blkCnt = (numCols - l) >> 3;
|
|
|
- while (blkCnt > 0U)
|
|
|
- {
|
|
|
-
|
|
|
- vecA = vldrhq_f16(pTmpA);
|
|
|
- vecB = vldrhq_f16(pTmpB);
|
|
|
- vstrhq_f16(pTmpB, vecA);
|
|
|
- vstrhq_f16(pTmpA, vecB);
|
|
|
-
|
|
|
- pTmpA += 8;
|
|
|
- pTmpB += 8;
|
|
|
- /*
|
|
|
- * Decrement the blockSize loop counter
|
|
|
- */
|
|
|
- blkCnt--;
|
|
|
- }
|
|
|
- /*
|
|
|
- * tail
|
|
|
- * (will be merged thru tail predication)
|
|
|
- */
|
|
|
- blkCnt = (numCols - l) & 7;
|
|
|
- if (blkCnt > 0U)
|
|
|
- {
|
|
|
- mve_pred16_t p0 = vctp16q(blkCnt);
|
|
|
-
|
|
|
- vecA = vldrhq_f16(pTmpA);
|
|
|
- vecB = vldrhq_f16(pTmpB);
|
|
|
- vstrhq_p_f16(pTmpB, vecA, p0);
|
|
|
- vstrhq_p_f16(pTmpA, vecB, p0);
|
|
|
- }
|
|
|
-
|
|
|
- pInT1 += numCols - l;
|
|
|
- pInT2 += numCols - l;
|
|
|
- pTmpA = pOutT1;
|
|
|
- pTmpB = pOutT2;
|
|
|
- blkCnt = numCols >> 3;
|
|
|
- while (blkCnt > 0U)
|
|
|
- {
|
|
|
-
|
|
|
- vecA = vldrhq_f16(pTmpA);
|
|
|
- vecB = vldrhq_f16(pTmpB);
|
|
|
- vstrhq_f16(pTmpB, vecA);
|
|
|
- vstrhq_f16(pTmpA, vecB);
|
|
|
- pTmpA += 8;
|
|
|
- pTmpB += 8;
|
|
|
- /*
|
|
|
- * Decrement the blockSize loop counter
|
|
|
- */
|
|
|
- blkCnt--;
|
|
|
- }
|
|
|
- /*
|
|
|
- * tail
|
|
|
- */
|
|
|
- blkCnt = numCols & 7;
|
|
|
- if (blkCnt > 0U)
|
|
|
- {
|
|
|
- mve_pred16_t p0 = vctp16q(blkCnt);
|
|
|
-
|
|
|
- vecA = vldrhq_f16(pTmpA);
|
|
|
- vecB = vldrhq_f16(pTmpB);
|
|
|
- vstrhq_p_f16(pTmpB, vecA, p0);
|
|
|
- vstrhq_p_f16(pTmpA, vecB, p0);
|
|
|
- }
|
|
|
-
|
|
|
- pOutT1 += numCols;
|
|
|
- pOutT2 += numCols;
|
|
|
- /*
|
|
|
- * Flag to indicate whether exchange is done or not
|
|
|
- */
|
|
|
- flag = 1U;
|
|
|
-
|
|
|
- /*
|
|
|
- * Break after exchange is done
|
|
|
- */
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Update the status if the matrix is singular
|
|
|
- */
|
|
|
- if ((flag != 1U) && (in == 0.0f16))
|
|
|
- {
|
|
|
- return ARM_MATH_SINGULAR;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Points to the pivot row of input and destination matrices
|
|
|
- */
|
|
|
- pPivotRowIn = pIn + (l * numCols);
|
|
|
- pPivotRowDst = pOut + (l * numCols);
|
|
|
-
|
|
|
- /*
|
|
|
- * Temporary pointers to the pivot row pointers
|
|
|
- */
|
|
|
- pInT1 = pPivotRowIn;
|
|
|
- pOutT1 = pPivotRowDst;
|
|
|
-
|
|
|
- /*
|
|
|
- * Pivot element of the row
|
|
|
- */
|
|
|
- in = *(pIn + (l * numCols));
|
|
|
-
|
|
|
- pTmpA = pInT1;
|
|
|
-
|
|
|
- f16x8_t invIn = vdupq_n_f16(1.0f16 / in);
|
|
|
-
|
|
|
- blkCnt = (numCols - l) >> 3;
|
|
|
- f16x8_t vecA;
|
|
|
- while (blkCnt > 0U)
|
|
|
- {
|
|
|
- *(f16x8_t *) pTmpA = *(f16x8_t *) pTmpA * invIn;
|
|
|
- pTmpA += 8;
|
|
|
- /*
|
|
|
- * Decrement the blockSize loop counter
|
|
|
- */
|
|
|
- blkCnt--;
|
|
|
- }
|
|
|
- /*
|
|
|
- * tail
|
|
|
- */
|
|
|
- blkCnt = (numCols - l) & 7;
|
|
|
- if (blkCnt > 0U)
|
|
|
- {
|
|
|
- mve_pred16_t p0 = vctp16q(blkCnt);
|
|
|
-
|
|
|
-
|
|
|
- vecA = vldrhq_f16(pTmpA);
|
|
|
- vecA = vecA * invIn;
|
|
|
- vstrhq_p_f16(pTmpA, vecA, p0);
|
|
|
- }
|
|
|
-
|
|
|
- pInT1 += numCols - l;
|
|
|
- /*
|
|
|
- * Loop over number of columns
|
|
|
- * * to the right of the pilot element
|
|
|
- */
|
|
|
-
|
|
|
- pTmpA = pOutT1;
|
|
|
- blkCnt = numCols >> 3;
|
|
|
- while (blkCnt > 0U)
|
|
|
- {
|
|
|
- *(f16x8_t *) pTmpA = *(f16x8_t *) pTmpA *invIn;
|
|
|
- pTmpA += 8;
|
|
|
- /*
|
|
|
- * Decrement the blockSize loop counter
|
|
|
- */
|
|
|
- blkCnt--;
|
|
|
- }
|
|
|
- /*
|
|
|
- * tail
|
|
|
- * (will be merged thru tail predication)
|
|
|
- */
|
|
|
- blkCnt = numCols & 7;
|
|
|
- if (blkCnt > 0U)
|
|
|
- {
|
|
|
- mve_pred16_t p0 = vctp16q(blkCnt);
|
|
|
-
|
|
|
- vecA = vldrhq_f16(pTmpA);
|
|
|
- vecA = vecA * invIn;
|
|
|
- vstrhq_p_f16(pTmpA, vecA, p0);
|
|
|
- }
|
|
|
-
|
|
|
- pOutT1 += numCols;
|
|
|
-
|
|
|
- /*
|
|
|
- * Replace the rows with the sum of that row and a multiple of row i
|
|
|
- * * so that each new element in column i above row i is zero.
|
|
|
- */
|
|
|
-
|
|
|
- /*
|
|
|
- * Temporary pointers for input and destination matrices
|
|
|
- */
|
|
|
- pInT1 = pIn;
|
|
|
- pOutT1 = pOut;
|
|
|
-
|
|
|
- for (i = 0U; i < numRows; i++)
|
|
|
- {
|
|
|
- /*
|
|
|
- * Check for the pivot element
|
|
|
- */
|
|
|
- if (i == l)
|
|
|
- {
|
|
|
- /*
|
|
|
- * If the processing element is the pivot element,
|
|
|
- * only the columns to the right are to be processed
|
|
|
- */
|
|
|
- pInT1 += numCols - l;
|
|
|
- pOutT1 += numCols;
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
- /*
|
|
|
- * Element of the reference row
|
|
|
- */
|
|
|
-
|
|
|
- /*
|
|
|
- * Working pointers for input and destination pivot rows
|
|
|
- */
|
|
|
- pPRT_in = pPivotRowIn;
|
|
|
- pPRT_pDst = pPivotRowDst;
|
|
|
- /*
|
|
|
- * Loop over the number of columns to the right of the pivot element,
|
|
|
- * to replace the elements in the input matrix
|
|
|
- */
|
|
|
-
|
|
|
- in = *pInT1;
|
|
|
- f16x8_t tmpV = vdupq_n_f16(in);
|
|
|
-
|
|
|
- blkCnt = (numCols - l) >> 3;
|
|
|
- while (blkCnt > 0U)
|
|
|
- {
|
|
|
- f16x8_t vec1, vec2;
|
|
|
- /*
|
|
|
- * Replace the element by the sum of that row
|
|
|
- * and a multiple of the reference row
|
|
|
- */
|
|
|
- vec1 = vldrhq_f16(pInT1);
|
|
|
- vec2 = vldrhq_f16(pPRT_in);
|
|
|
- vec1 = vfmsq_f16(vec1, tmpV, vec2);
|
|
|
- vstrhq_f16(pInT1, vec1);
|
|
|
- pPRT_in += 8;
|
|
|
- pInT1 += 8;
|
|
|
- /*
|
|
|
- * Decrement the blockSize loop counter
|
|
|
- */
|
|
|
- blkCnt--;
|
|
|
- }
|
|
|
- /*
|
|
|
- * tail
|
|
|
- * (will be merged thru tail predication)
|
|
|
- */
|
|
|
- blkCnt = (numCols - l) & 7;
|
|
|
- if (blkCnt > 0U)
|
|
|
- {
|
|
|
- f16x8_t vec1, vec2;
|
|
|
- mve_pred16_t p0 = vctp16q(blkCnt);
|
|
|
-
|
|
|
- vec1 = vldrhq_f16(pInT1);
|
|
|
- vec2 = vldrhq_f16(pPRT_in);
|
|
|
- vec1 = vfmsq_f16(vec1, tmpV, vec2);
|
|
|
- vstrhq_p_f16(pInT1, vec1, p0);
|
|
|
- pInT1 += blkCnt;
|
|
|
- }
|
|
|
-
|
|
|
- blkCnt = numCols >> 3;
|
|
|
- while (blkCnt > 0U)
|
|
|
- {
|
|
|
- f16x8_t vec1, vec2;
|
|
|
-
|
|
|
- /*
|
|
|
- * Replace the element by the sum of that row
|
|
|
- * and a multiple of the reference row
|
|
|
- */
|
|
|
- vec1 = vldrhq_f16(pOutT1);
|
|
|
- vec2 = vldrhq_f16(pPRT_pDst);
|
|
|
- vec1 = vfmsq_f16(vec1, tmpV, vec2);
|
|
|
- vstrhq_f16(pOutT1, vec1);
|
|
|
- pPRT_pDst += 8;
|
|
|
- pOutT1 += 8;
|
|
|
- /*
|
|
|
- * Decrement the blockSize loop counter
|
|
|
- */
|
|
|
- blkCnt--;
|
|
|
- }
|
|
|
- /*
|
|
|
- * tail
|
|
|
- * (will be merged thru tail predication)
|
|
|
- */
|
|
|
- blkCnt = numCols & 7;
|
|
|
- if (blkCnt > 0U)
|
|
|
- {
|
|
|
- f16x8_t vec1, vec2;
|
|
|
- mve_pred16_t p0 = vctp16q(blkCnt);
|
|
|
-
|
|
|
- vec1 = vldrhq_f16(pOutT1);
|
|
|
- vec2 = vldrhq_f16(pPRT_pDst);
|
|
|
- vec1 = vfmsq_f16(vec1, tmpV, vec2);
|
|
|
- vstrhq_p_f16(pOutT1, vec1, p0);
|
|
|
-
|
|
|
- pInT2 += blkCnt;
|
|
|
- pOutT1 += blkCnt;
|
|
|
- }
|
|
|
- }
|
|
|
- /*
|
|
|
- * Increment the temporary input pointer
|
|
|
- */
|
|
|
- pInT1 = pInT1 + l;
|
|
|
- }
|
|
|
- /*
|
|
|
- * Increment the input pointer
|
|
|
- */
|
|
|
- pIn++;
|
|
|
- /*
|
|
|
- * Decrement the loop counter
|
|
|
- */
|
|
|
- loopCnt--;
|
|
|
- /*
|
|
|
- * Increment the index modifier
|
|
|
- */
|
|
|
- l++;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Set status as ARM_MATH_SUCCESS
|
|
|
- */
|
|
|
- status = ARM_MATH_SUCCESS;
|
|
|
-
|
|
|
- if ((flag != 1U) && (in == 0.0f16))
|
|
|
- {
|
|
|
- pIn = pSrc->pData;
|
|
|
- for (i = 0; i < numRows * numCols; i++)
|
|
|
- {
|
|
|
- if ((_Float16)pIn[i] != 0.0f16)
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
- if (i == numRows * numCols)
|
|
|
- status = ARM_MATH_SINGULAR;
|
|
|
- }
|
|
|
- }
|
|
|
- /* Return to application */
|
|
|
- return (status);
|
|
|
-}
|
|
|
-
|
|
|
-#else
|
|
|
-
|
|
|
arm_status arm_mat_inverse_f16(
|
|
|
const arm_matrix_instance_f16 * pSrc,
|
|
|
arm_matrix_instance_f16 * pDst)
|
|
|
{
|
|
|
float16_t *pIn = pSrc->pData; /* input data matrix pointer */
|
|
|
float16_t *pOut = pDst->pData; /* output data matrix pointer */
|
|
|
- float16_t *pInT1, *pInT2; /* Temporary input data matrix pointer */
|
|
|
- float16_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
|
|
|
- float16_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */
|
|
|
+
|
|
|
+ float16_t *pTmp;
|
|
|
uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */
|
|
|
uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */
|
|
|
|
|
|
- _Float16 Xchg, in = 0.0f16, in1; /* Temporary input values */
|
|
|
- uint32_t i, rowCnt, flag = 0U, j, loopCnt, k,l; /* loop counters */
|
|
|
+
|
|
|
+ float16_t pivot = 0.0f16, newPivot=0.0f16; /* Temporary input values */
|
|
|
+ uint32_t selectedRow,pivotRow,i, rowNb, rowCnt, flag = 0U, j,column; /* loop counters */
|
|
|
arm_status status; /* status of matrix inverse */
|
|
|
|
|
|
#ifdef ARM_MATH_MATRIX_CHECK
|
|
|
@@ -581,7 +82,6 @@ arm_status arm_mat_inverse_f16(
|
|
|
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
|
|
|
|
|
|
{
|
|
|
-
|
|
|
/*--------------------------------------------------------------------------------------------------------------
|
|
|
* Matrix Inverse can be solved using elementary row operations.
|
|
|
*
|
|
|
@@ -618,7 +118,7 @@ arm_status arm_mat_inverse_f16(
|
|
|
*----------------------------------------------------------------------------------------------------------------*/
|
|
|
|
|
|
/* Working pointer for destination matrix */
|
|
|
- pOutT1 = pOut;
|
|
|
+ pTmp = pOut;
|
|
|
|
|
|
/* Loop over the number of rows */
|
|
|
rowCnt = numRows;
|
|
|
@@ -630,18 +130,18 @@ arm_status arm_mat_inverse_f16(
|
|
|
j = numRows - rowCnt;
|
|
|
while (j > 0U)
|
|
|
{
|
|
|
- *pOutT1++ = 0.0f16;
|
|
|
+ *pTmp++ = 0.0f16;
|
|
|
j--;
|
|
|
}
|
|
|
|
|
|
/* Writing all ones in the diagonal of the destination matrix */
|
|
|
- *pOutT1++ = 1.0f16;
|
|
|
+ *pTmp++ = 1.0f16;
|
|
|
|
|
|
/* Writing all zeroes in upper triangle of the destination matrix */
|
|
|
j = rowCnt - 1U;
|
|
|
while (j > 0U)
|
|
|
{
|
|
|
- *pOutT1++ = 0.0f16;
|
|
|
+ *pTmp++ = 0.0f16;
|
|
|
j--;
|
|
|
}
|
|
|
|
|
|
@@ -651,220 +151,100 @@ arm_status arm_mat_inverse_f16(
|
|
|
|
|
|
/* Loop over the number of columns of the input matrix.
|
|
|
All the elements in each column are processed by the row operations */
|
|
|
- loopCnt = numCols;
|
|
|
|
|
|
/* Index modifier to navigate through the columns */
|
|
|
- l = 0U;
|
|
|
-
|
|
|
- while (loopCnt > 0U)
|
|
|
+ for(column = 0U; column < numCols; column++)
|
|
|
{
|
|
|
/* Check if the pivot element is zero..
|
|
|
* If it is zero then interchange the row with non zero row below.
|
|
|
* If there is no non zero element to replace in the rows below,
|
|
|
* then the matrix is Singular. */
|
|
|
|
|
|
- /* Working pointer for the input matrix that points
|
|
|
- * to the pivot element of the particular row */
|
|
|
- pInT1 = pIn + (l * numCols);
|
|
|
-
|
|
|
- /* Working pointer for the destination matrix that points
|
|
|
- * to the pivot element of the particular row */
|
|
|
- pOutT1 = pOut + (l * numCols);
|
|
|
+ pivotRow = column;
|
|
|
|
|
|
/* Temporary variable to hold the pivot value */
|
|
|
- in = *pInT1;
|
|
|
-
|
|
|
+ pTmp = ELEM(pSrc,column,column) ;
|
|
|
+ pivot = *pTmp;
|
|
|
+ selectedRow = column;
|
|
|
|
|
|
- /* Check if the pivot element is zero */
|
|
|
- if ((_Float16)*pInT1 == 0.0f16)
|
|
|
- {
|
|
|
+
|
|
|
/* Loop over the number rows present below */
|
|
|
|
|
|
- for (i = 1U; i < numRows-l; i++)
|
|
|
- {
|
|
|
+ for (rowNb = column+1; rowNb < numRows; rowNb++)
|
|
|
+ {
|
|
|
/* Update the input and destination pointers */
|
|
|
- pInT2 = pInT1 + (numCols * i);
|
|
|
- pOutT2 = pOutT1 + (numCols * i);
|
|
|
+ pTmp = ELEM(pSrc,rowNb,column);
|
|
|
+ newPivot = *pTmp;
|
|
|
+ if (fabsf((float32_t)newPivot) > fabsf((float32_t)pivot))
|
|
|
+ {
|
|
|
+ selectedRow = rowNb;
|
|
|
+ pivot = newPivot;
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
|
|
|
/* Check if there is a non zero pivot element to
|
|
|
* replace in the rows below */
|
|
|
- if ((_Float16)*pInT2 != 0.0f16)
|
|
|
- {
|
|
|
+ if (((_Float16)pivot != 0.0f16) && (selectedRow != column))
|
|
|
+ {
|
|
|
/* Loop over number of columns
|
|
|
* to the right of the pilot element */
|
|
|
- j = numCols - l;
|
|
|
-
|
|
|
- while (j > 0U)
|
|
|
- {
|
|
|
- /* Exchange the row elements of the input matrix */
|
|
|
- Xchg = *pInT2;
|
|
|
- *pInT2++ = *pInT1;
|
|
|
- *pInT1++ = Xchg;
|
|
|
|
|
|
- /* Decrement the loop counter */
|
|
|
- j--;
|
|
|
- }
|
|
|
-
|
|
|
- /* Loop over number of columns of the destination matrix */
|
|
|
- j = numCols;
|
|
|
-
|
|
|
- while (j > 0U)
|
|
|
- {
|
|
|
- /* Exchange the row elements of the destination matrix */
|
|
|
- Xchg = *pOutT2;
|
|
|
- *pOutT2++ = *pOutT1;
|
|
|
- *pOutT1++ = Xchg;
|
|
|
-
|
|
|
- /* Decrement loop counter */
|
|
|
- j--;
|
|
|
- }
|
|
|
+ SWAP_ROWS_F16(pSrc,column, pivotRow,selectedRow);
|
|
|
+ SWAP_ROWS_F16(pDst,0, pivotRow,selectedRow);
|
|
|
|
|
|
+
|
|
|
/* Flag to indicate whether exchange is done or not */
|
|
|
flag = 1U;
|
|
|
|
|
|
- /* Break after exchange is done */
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
+
|
|
|
/* Update the status if the matrix is singular */
|
|
|
- if ((flag != 1U) && (in == 0.0f16))
|
|
|
+ if ((flag != 1U) && ((_Float16)pivot == 0.0f16))
|
|
|
{
|
|
|
return ARM_MATH_SINGULAR;
|
|
|
}
|
|
|
|
|
|
- /* Points to the pivot row of input and destination matrices */
|
|
|
- pPivotRowIn = pIn + (l * numCols);
|
|
|
- pPivotRowDst = pOut + (l * numCols);
|
|
|
-
|
|
|
- /* Temporary pointers to the pivot row pointers */
|
|
|
- pInT1 = pPivotRowIn;
|
|
|
- pInT2 = pPivotRowDst;
|
|
|
-
|
|
|
+
|
|
|
/* Pivot element of the row */
|
|
|
- in = *pPivotRowIn;
|
|
|
+ pivot = 1.0f16 / (_Float16)pivot;
|
|
|
|
|
|
- /* Loop over number of columns
|
|
|
- * to the right of the pilot element */
|
|
|
- j = (numCols - l);
|
|
|
-
|
|
|
- while (j > 0U)
|
|
|
- {
|
|
|
- /* Divide each element of the row of the input matrix
|
|
|
- * by the pivot element */
|
|
|
- in1 = *pInT1;
|
|
|
- *pInT1++ = in1 / in;
|
|
|
-
|
|
|
- /* Decrement the loop counter */
|
|
|
- j--;
|
|
|
- }
|
|
|
-
|
|
|
- /* Loop over number of columns of the destination matrix */
|
|
|
- j = numCols;
|
|
|
-
|
|
|
- while (j > 0U)
|
|
|
- {
|
|
|
- /* Divide each element of the row of the destination matrix
|
|
|
- * by the pivot element */
|
|
|
- in1 = *pInT2;
|
|
|
- *pInT2++ = in1 / in;
|
|
|
-
|
|
|
- /* Decrement the loop counter */
|
|
|
- j--;
|
|
|
- }
|
|
|
+ SCALE_ROW_F16(pSrc,column,pivot,pivotRow);
|
|
|
+ SCALE_ROW_F16(pDst,0,pivot,pivotRow);
|
|
|
|
|
|
+
|
|
|
/* Replace the rows with the sum of that row and a multiple of row i
|
|
|
* so that each new element in column i above row i is zero.*/
|
|
|
|
|
|
- /* Temporary pointers for input and destination matrices */
|
|
|
- pInT1 = pIn;
|
|
|
- pInT2 = pOut;
|
|
|
-
|
|
|
- /* index used to check for pivot element */
|
|
|
- i = 0U;
|
|
|
-
|
|
|
- /* Loop over number of rows */
|
|
|
- /* to be replaced by the sum of that row and a multiple of row i */
|
|
|
- k = numRows;
|
|
|
-
|
|
|
- while (k > 0U)
|
|
|
+ rowNb = 0;
|
|
|
+ for (;rowNb < pivotRow; rowNb++)
|
|
|
{
|
|
|
- /* Check for the pivot element */
|
|
|
- if (i == l)
|
|
|
- {
|
|
|
- /* If the processing element is the pivot element,
|
|
|
- only the columns to the right are to be processed */
|
|
|
- pInT1 += numCols - l;
|
|
|
-
|
|
|
- pInT2 += numCols;
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
- /* Element of the reference row */
|
|
|
- in = *pInT1;
|
|
|
-
|
|
|
- /* Working pointers for input and destination pivot rows */
|
|
|
- pPRT_in = pPivotRowIn;
|
|
|
- pPRT_pDst = pPivotRowDst;
|
|
|
-
|
|
|
- /* Loop over the number of columns to the right of the pivot element,
|
|
|
- to replace the elements in the input matrix */
|
|
|
- j = (numCols - l);
|
|
|
-
|
|
|
- while (j > 0U)
|
|
|
- {
|
|
|
- /* Replace the element by the sum of that row
|
|
|
- and a multiple of the reference row */
|
|
|
- in1 = *pInT1;
|
|
|
- *pInT1++ = (_Float16)in1 - ((_Float16)in * (_Float16)*pPRT_in++);
|
|
|
-
|
|
|
- /* Decrement the loop counter */
|
|
|
- j--;
|
|
|
- }
|
|
|
-
|
|
|
- /* Loop over the number of columns to
|
|
|
- replace the elements in the destination matrix */
|
|
|
- j = numCols;
|
|
|
+ pTmp = ELEM(pSrc,rowNb,column) ;
|
|
|
+ pivot = *pTmp;
|
|
|
|
|
|
- while (j > 0U)
|
|
|
- {
|
|
|
- /* Replace the element by the sum of that row
|
|
|
- and a multiple of the reference row */
|
|
|
- in1 = *pInT2;
|
|
|
- *pInT2++ = (_Float16)in1 - ((_Float16)in * (_Float16)*pPRT_pDst++);
|
|
|
+ MAS_ROW_F16(column,pSrc,rowNb,pivot,pSrc,pivotRow);
|
|
|
+ MAS_ROW_F16(0 ,pDst,rowNb,pivot,pDst,pivotRow);
|
|
|
|
|
|
- /* Decrement loop counter */
|
|
|
- j--;
|
|
|
- }
|
|
|
|
|
|
- }
|
|
|
+ }
|
|
|
|
|
|
- /* Increment temporary input pointer */
|
|
|
- pInT1 = pInT1 + l;
|
|
|
+ for (rowNb = pivotRow + 1; rowNb < numRows; rowNb++)
|
|
|
+ {
|
|
|
+ pTmp = ELEM(pSrc,rowNb,column) ;
|
|
|
+ pivot = *pTmp;
|
|
|
|
|
|
- /* Decrement loop counter */
|
|
|
- k--;
|
|
|
+ MAS_ROW_F16(column,pSrc,rowNb,pivot,pSrc,pivotRow);
|
|
|
+ MAS_ROW_F16(0 ,pDst,rowNb,pivot,pDst,pivotRow);
|
|
|
|
|
|
- /* Increment pivot index */
|
|
|
- i++;
|
|
|
}
|
|
|
|
|
|
- /* Increment the input pointer */
|
|
|
- pIn++;
|
|
|
-
|
|
|
- /* Decrement the loop counter */
|
|
|
- loopCnt--;
|
|
|
-
|
|
|
- /* Increment the index modifier */
|
|
|
- l++;
|
|
|
}
|
|
|
|
|
|
/* Set status as ARM_MATH_SUCCESS */
|
|
|
status = ARM_MATH_SUCCESS;
|
|
|
|
|
|
- if ((flag != 1U) && ((_Float16)in == 0.0f16))
|
|
|
+ if ((flag != 1U) && ((_Float16)pivot == 0.0f16))
|
|
|
{
|
|
|
pIn = pSrc->pData;
|
|
|
for (i = 0; i < numRows * numCols; i++)
|
|
|
@@ -881,8 +261,6 @@ arm_status arm_mat_inverse_f16(
|
|
|
/* Return to application */
|
|
|
return (status);
|
|
|
}
|
|
|
-#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
|
|
-
|
|
|
/**
|
|
|
@} end of MatrixInv group
|
|
|
*/
|