arm_mat_trans_q7.c 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_mat_trans_q7.c
  4. * Description: Q7 matrix transpose
  5. *
  6. * $Date: 23 April 2021
  7. * $Revision: V1.9.0
  8. *
  9. * Target Processor: Cortex-M and Cortex-A cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/matrix_functions.h"
  29. /**
  30. @ingroup groupMatrix
  31. */
  32. /**
  33. @addtogroup MatrixTrans
  34. @{
  35. */
  36. /**
  37. @brief Q7 matrix transpose.
  38. @param[in] pSrc points to input matrix
  39. @param[out] pDst points to output matrix
  40. @return execution status
  41. - \ref ARM_MATH_SUCCESS : Operation successful
  42. - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
  43. */
  44. #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
  45. arm_status arm_mat_trans_q7(const arm_matrix_instance_q7 *pSrc, arm_matrix_instance_q7 *pDst)
  46. {
  47. uint16x8_t vecOffs;
  48. uint32_t i;
  49. uint32_t blkCnt;
  50. uint8_t const *pDataC;
  51. uint8_t *pDataDestR;
  52. uint16x8_t vecIn;
  53. const uint8_t * pDataSrc=(const uint8_t *)pSrc->pData;
  54. uint8_t * pDataDst=(uint8_t *)pDst->pData;
  55. #ifdef ARM_MATH_MATRIX_CHECK
  56. /* Check for matrix mismatch condition */
  57. if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
  58. {
  59. /* Set status as ARM_MATH_SIZE_MISMATCH */
  60. return ARM_MATH_SIZE_MISMATCH;
  61. }
  62. #endif
  63. vecOffs = vidupq_u16((uint32_t)0, 1);
  64. vecOffs = vecOffs * pSrc->numCols;
  65. i = pSrc->numCols;
  66. do
  67. {
  68. pDataC = (uint8_t const *) pDataSrc;
  69. pDataDestR = (uint8_t*)pDataDst;
  70. blkCnt = pSrc->numRows >> 3;
  71. while (blkCnt > 0U)
  72. {
  73. /* widened loads */
  74. vecIn = vldrbq_gather_offset_u16(pDataC, vecOffs);
  75. vstrbq_u16(pDataDestR, vecIn);
  76. pDataDestR += 8;
  77. pDataC = pDataC + pSrc->numCols * 8;
  78. /*
  79. * Decrement the blockSize loop counter
  80. */
  81. blkCnt--;
  82. }
  83. /*
  84. * tail
  85. * (will be merged thru tail predication)
  86. */
  87. blkCnt = pSrc->numRows & 7;
  88. if (blkCnt > 0U)
  89. {
  90. mve_pred16_t p0 = vctp16q(blkCnt);
  91. vecIn = vldrbq_gather_offset_u16(pDataC, vecOffs);
  92. vstrbq_p_u16(pDataDestR, vecIn, p0);
  93. }
  94. pDataSrc += 1;
  95. pDataDst += pSrc->numRows;
  96. }
  97. while (--i);
  98. return (ARM_MATH_SUCCESS);
  99. }
  100. #else
  101. arm_status arm_mat_trans_q7(const arm_matrix_instance_q7 *pSrc, arm_matrix_instance_q7 *pDst)
  102. {
  103. q7_t *pSrcA = pSrc->pData; /* input data matrix pointer */
  104. q7_t *pOut = pDst->pData; /* output data matrix pointer */
  105. uint16_t nRows = pSrc->numRows; /* number of nRows */
  106. uint16_t nColumns = pSrc->numCols; /* number of nColumns */
  107. uint16_t col, row = nRows, i = 0U; /* row and column loop counters */
  108. arm_status status; /* status of matrix transpose */
  109. #ifdef ARM_MATH_MATRIX_CHECK
  110. /* Check for matrix mismatch condition */
  111. if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows)) {
  112. /* Set status as ARM_MATH_SIZE_MISMATCH */
  113. status = ARM_MATH_SIZE_MISMATCH;
  114. } else
  115. #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
  116. {
  117. /* Matrix transpose by exchanging the rows with columns */
  118. /* row loop */
  119. do {
  120. /* The pointer pOut is set to starting address of the column being processed */
  121. pOut = pDst->pData + i;
  122. /* Initialize column loop counter */
  123. col = nColumns;
  124. while (col > 0U) {
  125. /* Read and store the input element in the destination */
  126. *pOut = *pSrcA++;
  127. /* Update the pointer pOut to point to the next row of the transposed matrix */
  128. pOut += nRows;
  129. /* Decrement the column loop counter */
  130. col--;
  131. }
  132. i++;
  133. /* Decrement the row loop counter */
  134. row--;
  135. } while (row > 0U);
  136. /* set status as ARM_MATH_SUCCESS */
  137. status = ARM_MATH_SUCCESS;
  138. }
  139. /* Return to application */
  140. return (status);
  141. }
  142. #endif /* defined(ARM_MATH_MVEI) */
  143. /**
  144. @} end of MatrixTrans group
  145. */