matrix_functions_f16.h 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. /******************************************************************************
  2. * @file matrix_functions_f16.h
  3. * @brief Public header file for CMSIS DSP Library
  4. * @version V1.10.0
  5. * @date 08 July 2021
  6. * Target Processor: Cortex-M and Cortex-A cores
  7. ******************************************************************************/
  8. /*
  9. * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
  10. *
  11. * SPDX-License-Identifier: Apache-2.0
  12. *
  13. * Licensed under the Apache License, Version 2.0 (the License); you may
  14. * not use this file except in compliance with the License.
  15. * You may obtain a copy of the License at
  16. *
  17. * www.apache.org/licenses/LICENSE-2.0
  18. *
  19. * Unless required by applicable law or agreed to in writing, software
  20. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  21. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  22. * See the License for the specific language governing permissions and
  23. * limitations under the License.
  24. */
  25. #ifndef _MATRIX_FUNCTIONS_F16_H_
  26. #define _MATRIX_FUNCTIONS_F16_H_
  27. #ifdef __cplusplus
  28. extern "C"
  29. {
  30. #endif
  31. #include "arm_math_types_f16.h"
  32. #include "arm_math_memory.h"
  33. #include "dsp/none.h"
  34. #include "dsp/utils.h"
  35. #if defined(ARM_FLOAT16_SUPPORTED)
  36. #define DEFAULT_HOUSEHOLDER_THRESHOLD_F16 (1.0e-3f)
  37. /**
  38. * @brief Instance structure for the floating-point matrix structure.
  39. */
  40. typedef struct
  41. {
  42. uint16_t numRows; /**< number of rows of the matrix. */
  43. uint16_t numCols; /**< number of columns of the matrix. */
  44. float16_t *pData; /**< points to the data of the matrix. */
  45. } arm_matrix_instance_f16;
  46. /**
  47. * @brief Floating-point matrix addition.
  48. * @param[in] pSrcA points to the first input matrix structure
  49. * @param[in] pSrcB points to the second input matrix structure
  50. * @param[out] pDst points to output matrix structure
  51. * @return The function returns either
  52. * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
  53. */
  54. arm_status arm_mat_add_f16(
  55. const arm_matrix_instance_f16 * pSrcA,
  56. const arm_matrix_instance_f16 * pSrcB,
  57. arm_matrix_instance_f16 * pDst);
  58. /**
  59. * @brief Floating-point, complex, matrix multiplication.
  60. * @param[in] pSrcA points to the first input matrix structure
  61. * @param[in] pSrcB points to the second input matrix structure
  62. * @param[out] pDst points to output matrix structure
  63. * @return The function returns either
  64. * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
  65. */
  66. arm_status arm_mat_cmplx_mult_f16(
  67. const arm_matrix_instance_f16 * pSrcA,
  68. const arm_matrix_instance_f16 * pSrcB,
  69. arm_matrix_instance_f16 * pDst);
  70. /**
  71. * @brief Floating-point matrix transpose.
  72. * @param[in] pSrc points to the input matrix
  73. * @param[out] pDst points to the output matrix
  74. * @return The function returns either <code>ARM_MATH_SIZE_MISMATCH</code>
  75. * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
  76. */
  77. arm_status arm_mat_trans_f16(
  78. const arm_matrix_instance_f16 * pSrc,
  79. arm_matrix_instance_f16 * pDst);
  80. /**
  81. * @brief Floating-point complex matrix transpose.
  82. * @param[in] pSrc points to the input matrix
  83. * @param[out] pDst points to the output matrix
  84. * @return The function returns either <code>ARM_MATH_SIZE_MISMATCH</code>
  85. * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
  86. */
  87. arm_status arm_mat_cmplx_trans_f16(
  88. const arm_matrix_instance_f16 * pSrc,
  89. arm_matrix_instance_f16 * pDst);
  90. /**
  91. * @brief Floating-point matrix multiplication
  92. * @param[in] pSrcA points to the first input matrix structure
  93. * @param[in] pSrcB points to the second input matrix structure
  94. * @param[out] pDst points to output matrix structure
  95. * @return The function returns either
  96. * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
  97. */
  98. arm_status arm_mat_mult_f16(
  99. const arm_matrix_instance_f16 * pSrcA,
  100. const arm_matrix_instance_f16 * pSrcB,
  101. arm_matrix_instance_f16 * pDst);
  102. /**
  103. * @brief Floating-point matrix and vector multiplication
  104. * @param[in] pSrcMat points to the input matrix structure
  105. * @param[in] pVec points to vector
  106. * @param[out] pDst points to output vector
  107. */
  108. void arm_mat_vec_mult_f16(
  109. const arm_matrix_instance_f16 *pSrcMat,
  110. const float16_t *pVec,
  111. float16_t *pDst);
  112. /**
  113. * @brief Floating-point matrix subtraction
  114. * @param[in] pSrcA points to the first input matrix structure
  115. * @param[in] pSrcB points to the second input matrix structure
  116. * @param[out] pDst points to output matrix structure
  117. * @return The function returns either
  118. * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
  119. */
  120. arm_status arm_mat_sub_f16(
  121. const arm_matrix_instance_f16 * pSrcA,
  122. const arm_matrix_instance_f16 * pSrcB,
  123. arm_matrix_instance_f16 * pDst);
  124. /**
  125. * @brief Floating-point matrix scaling.
  126. * @param[in] pSrc points to the input matrix
  127. * @param[in] scale scale factor
  128. * @param[out] pDst points to the output matrix
  129. * @return The function returns either
  130. * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
  131. */
  132. arm_status arm_mat_scale_f16(
  133. const arm_matrix_instance_f16 * pSrc,
  134. float16_t scale,
  135. arm_matrix_instance_f16 * pDst);
  136. /**
  137. * @brief Floating-point matrix initialization.
  138. * @param[in,out] S points to an instance of the floating-point matrix structure.
  139. * @param[in] nRows number of rows in the matrix.
  140. * @param[in] nColumns number of columns in the matrix.
  141. * @param[in] pData points to the matrix data array.
  142. */
  143. void arm_mat_init_f16(
  144. arm_matrix_instance_f16 * S,
  145. uint16_t nRows,
  146. uint16_t nColumns,
  147. float16_t * pData);
  148. /**
  149. * @brief Floating-point matrix inverse.
  150. * @param[in] src points to the instance of the input floating-point matrix structure.
  151. * @param[out] dst points to the instance of the output floating-point matrix structure.
  152. * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
  153. * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
  154. */
  155. arm_status arm_mat_inverse_f16(
  156. const arm_matrix_instance_f16 * src,
  157. arm_matrix_instance_f16 * dst);
  158. /**
  159. * @brief Floating-point Cholesky decomposition of Symmetric Positive Definite Matrix.
  160. * @param[in] src points to the instance of the input floating-point matrix structure.
  161. * @param[out] dst points to the instance of the output floating-point matrix structure.
  162. * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
  163. * If the input matrix does not have a decomposition, then the algorithm terminates and returns error status ARM_MATH_DECOMPOSITION_FAILURE.
  164. * If the matrix is ill conditioned or only semi-definite, then it is better using the LDL^t decomposition.
  165. * The decomposition is returning a lower triangular matrix.
  166. */
  167. arm_status arm_mat_cholesky_f16(
  168. const arm_matrix_instance_f16 * src,
  169. arm_matrix_instance_f16 * dst);
  170. /**
  171. * @brief Solve UT . X = A where UT is an upper triangular matrix
  172. * @param[in] ut The upper triangular matrix
  173. * @param[in] a The matrix a
  174. * @param[out] dst The solution X of UT . X = A
  175. * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
  176. */
  177. arm_status arm_mat_solve_upper_triangular_f16(
  178. const arm_matrix_instance_f16 * ut,
  179. const arm_matrix_instance_f16 * a,
  180. arm_matrix_instance_f16 * dst);
  181. /**
  182. * @brief Solve LT . X = A where LT is a lower triangular matrix
  183. * @param[in] lt The lower triangular matrix
  184. * @param[in] a The matrix a
  185. * @param[out] dst The solution X of LT . X = A
  186. * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
  187. */
  188. arm_status arm_mat_solve_lower_triangular_f16(
  189. const arm_matrix_instance_f16 * lt,
  190. const arm_matrix_instance_f16 * a,
  191. arm_matrix_instance_f16 * dst);
  192. /**
  193. @brief QR decomposition of a m x n floating point matrix with m >= n.
  194. @param[in] pSrc points to input matrix structure. The source matrix is modified by the function.
  195. @param[in] threshold norm2 threshold.
  196. @param[out] pOutR points to output R matrix structure of dimension m x n
  197. @param[out] pOutQ points to output Q matrix structure of dimension m x m
  198. @param[out] pOutTau points to Householder scaling factors of dimension n
  199. @param[inout] pTmpA points to a temporary vector of dimension m.
  200. @param[inout] pTmpB points to a temporary vector of dimension n.
  201. @return execution status
  202. - \ref ARM_MATH_SUCCESS : Operation successful
  203. - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
  204. - \ref ARM_MATH_SINGULAR : Input matrix is found to be singular (non-invertible)
  205. */
  206. arm_status arm_mat_qr_f16(
  207. const arm_matrix_instance_f16 * pSrc,
  208. const float16_t threshold,
  209. arm_matrix_instance_f16 * pOutR,
  210. arm_matrix_instance_f16 * pOutQ,
  211. float16_t * pOutTau,
  212. float16_t *pTmpA,
  213. float16_t *pTmpB
  214. );
  215. /**
  216. @brief Householder transform of a half floating point vector.
  217. @param[in] pSrc points to the input vector.
  218. @param[in] threshold norm2 threshold.
  219. @param[in] blockSize dimension of the vector space.
  220. @param[outQ] pOut points to the output vector.
  221. @return beta return the scaling factor beta
  222. */
  223. float16_t arm_householder_f16(
  224. const float16_t * pSrc,
  225. const float16_t threshold,
  226. uint32_t blockSize,
  227. float16_t * pOut
  228. );
  229. #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
  230. #ifdef __cplusplus
  231. }
  232. #endif
  233. #endif /* ifndef _MATRIX_FUNCTIONS_F16_H_ */