arm_mat_trans_f16.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_mat_trans_f16.c
  4. * Description: Floating-point matrix transpose
  5. *
  6. * $Date: 18. March 2020
  7. * $Revision: V1.6.0
  8. *
  9. * Target Processor: Cortex-M cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/matrix_functions_f16.h"
  29. #if defined(ARM_FLOAT16_SUPPORTED)
  30. /**
  31. @ingroup groupMatrix
  32. */
  33. /**
  34. @addtogroup MatrixTrans
  35. @{
  36. */
  37. /**
  38. @brief Floating-point matrix transpose.
  39. @param[in] pSrc points to input matrix
  40. @param[out] pDst points to output matrix
  41. @return execution status
  42. - \ref ARM_MATH_SUCCESS : Operation successful
  43. - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
  44. */
  45. #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
  46. #include "arm_helium_utils.h"
  47. arm_status arm_mat_trans_f16(
  48. const arm_matrix_instance_f16 * pSrc,
  49. arm_matrix_instance_f16 * pDst)
  50. {
  51. arm_status status; /* status of matrix transpose */
  52. #ifdef ARM_MATH_MATRIX_CHECK
  53. /* Check for matrix mismatch condition */
  54. if ((pSrc->numRows != pDst->numCols) ||
  55. (pSrc->numCols != pDst->numRows) )
  56. {
  57. /* Set status as ARM_MATH_SIZE_MISMATCH */
  58. status = ARM_MATH_SIZE_MISMATCH;
  59. }
  60. else
  61. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  62. {
  63. if (pDst->numRows == pDst->numCols)
  64. {
  65. if (pDst->numCols == 1)
  66. {
  67. pDst->pData[0] = pSrc->pData[0];
  68. return(ARM_MATH_SUCCESS);
  69. }
  70. if (pDst->numCols == 2)
  71. return arm_mat_trans_16bit_2x2((uint16_t *)pSrc->pData, (uint16_t *)pDst->pData);
  72. if (pDst->numCols == 3)
  73. return arm_mat_trans_16bit_3x3_mve((uint16_t *)pSrc->pData, (uint16_t *)pDst->pData);
  74. if (pDst->numCols == 4)
  75. return arm_mat_trans_16bit_4x4_mve((uint16_t *)pSrc->pData, (uint16_t *)pDst->pData);
  76. }
  77. arm_mat_trans_16bit_generic(pSrc->numRows, pSrc->numCols, (uint16_t *)pSrc->pData, (uint16_t *)pDst->pData);
  78. /* Set status as ARM_MATH_SUCCESS */
  79. status = ARM_MATH_SUCCESS;
  80. }
  81. /* Return to application */
  82. return (status);
  83. }
  84. #else
  85. arm_status arm_mat_trans_f16(
  86. const arm_matrix_instance_f16 * pSrc,
  87. arm_matrix_instance_f16 * pDst)
  88. {
  89. float16_t *pIn = pSrc->pData; /* input data matrix pointer */
  90. float16_t *pOut = pDst->pData; /* output data matrix pointer */
  91. float16_t *px; /* Temporary output data matrix pointer */
  92. uint16_t nRows = pSrc->numRows; /* number of rows */
  93. uint16_t nCols = pSrc->numCols; /* number of columns */
  94. uint32_t col, row = nRows, i = 0U; /* Loop counters */
  95. arm_status status; /* status of matrix transpose */
  96. #ifdef ARM_MATH_MATRIX_CHECK
  97. /* Check for matrix mismatch condition */
  98. if ((pSrc->numRows != pDst->numCols) ||
  99. (pSrc->numCols != pDst->numRows) )
  100. {
  101. /* Set status as ARM_MATH_SIZE_MISMATCH */
  102. status = ARM_MATH_SIZE_MISMATCH;
  103. }
  104. else
  105. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  106. {
  107. /* Matrix transpose by exchanging the rows with columns */
  108. /* row loop */
  109. do
  110. {
  111. /* Pointer px is set to starting address of column being processed */
  112. px = pOut + i;
  113. #if defined (ARM_MATH_LOOPUNROLL)
  114. /* Loop unrolling: Compute 4 outputs at a time */
  115. col = nCols >> 2U;
  116. while (col > 0U) /* column loop */
  117. {
  118. /* Read and store input element in destination */
  119. *px = *pIn++;
  120. /* Update pointer px to point to next row of transposed matrix */
  121. px += nRows;
  122. *px = *pIn++;
  123. px += nRows;
  124. *px = *pIn++;
  125. px += nRows;
  126. *px = *pIn++;
  127. px += nRows;
  128. /* Decrement column loop counter */
  129. col--;
  130. }
  131. /* Loop unrolling: Compute remaining outputs */
  132. col = nCols % 0x4U;
  133. #else
  134. /* Initialize col with number of samples */
  135. col = nCols;
  136. #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  137. while (col > 0U)
  138. {
  139. /* Read and store input element in destination */
  140. *px = *pIn++;
  141. /* Update pointer px to point to next row of transposed matrix */
  142. px += nRows;
  143. /* Decrement column loop counter */
  144. col--;
  145. }
  146. i++;
  147. /* Decrement row loop counter */
  148. row--;
  149. } while (row > 0U); /* row loop end */
  150. /* Set status as ARM_MATH_SUCCESS */
  151. status = ARM_MATH_SUCCESS;
  152. }
  153. /* Return to application */
  154. return (status);
  155. }
  156. #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
  157. /**
  158. * @} end of MatrixTrans group
  159. */
  160. #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */