arm_nnfunctions.h 127 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532
  1. /*
  2. * Copyright (C) 2010-2022 Arm Limited or its affiliates.
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the License); you may
  7. * not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  14. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /* ----------------------------------------------------------------------
  19. * Project: CMSIS NN Library
  20. * Title: arm_nnfunctions.h
  21. * Description: Public header file for CMSIS NN Library
  22. *
  23. * $Date: 19 April 2022
  24. * $Revision: V.9.0.0
  25. *
  26. * Target Processor: Cortex-M CPUs
  27. * -------------------------------------------------------------------- */
  28. /**
  29. \mainpage CMSIS NN Software Library
  30. *
  31. * Introduction
  32. * ------------
  33. *
  34. * This user manual describes the CMSIS NN software library,
  35. * a collection of efficient neural network kernels developed to maximize the
  36. * performance and minimize the memory footprint of neural networks on Cortex-M processor cores.
  37. *
  38. * The library is divided into a number of functions each covering a specific category:
  39. * - Convolution Functions
  40. * - Activation Functions
  41. * - Fully-connected Layer Functions
  42. * - SVDF Layer Functions
  43. * - Pooling Functions
  44. * - Softmax Functions
  45. * - Basic math Functions
  46. *
  47. * The library has separate functions for operating on different weight and activation data
  48. * types including 8-bit integers (q7_t) and 16-bit integers (q15_t). The descrition of the
  49. * kernels are included in the function description. The implementation details are also
  50. * described in this paper [1].
  51. *
  52. * Supported Processors
  53. * -------
  54. * CMSIS-NN targets Cortex-M processors with typically three different implementations for each function. Each
  55. * targets a different group of processors.
  56. * - Processors without SIMD capability (e.g, Cortex-M0)
  57. * - Processors with DSP extention (e.g Cortex-M4)
  58. * - Processors with MVE extension (e.g Cortex-M55)
  59. * The right implementation is picked through feature flags and the user usually does not have to explicit set it.
  60. *
  61. * Function Classification
  62. * --------
  63. * The functions can be classified into two segments
  64. * - Legacy functions supporting ARM's internal symmetric quantization(8 bits).
  65. * - Functions that support TensorFlow Lite framework with symmetric quantization(8 bits).
  66. *
  67. * The legacy functions can be identified with their suffix of _q7 or _q15 and are no new development is done there.
  68. * The article in [2] describes in detail how to run a network using the legacy functions.
  69. *
  70. * The functions supporting TensorFlow Lite framework is identified by the _s8 suffix and can be invoked from TFL
  71. * micro. The functions are bit exact to TensorFlow Lite. Refer to the TensorFlow's documentation in [3] on how to run
  72. * a TensorFlow Lite model using optimized CMSIS-NN kernels.
  73. *
  74. * Block Diagram
  75. * --------
  76. * \image html CMSIS-NN-OVERVIEW.PNG
  77. *
  78. * Examples
  79. * --------
  80. *
  81. * The library ships with a number of examples which demonstrate how to use the library functions.
  82. *
  83. * Pre-processor Macros
  84. * ------------
  85. *
  86. * Each library project have different pre-processor macros.
  87. *
  88. * - ARM_MATH_DSP:
  89. *
  90. * Define macro ARM_MATH_DSP, If the silicon supports DSP instructions(DSP extension).
  91. *
  92. * - ARM_MATH_MVEI:
  93. *
  94. * Define macro ARM_MATH_MVEI, If the silicon supports M-Profile Vector Extension.
  95. * - ARM_MATH_AUTOVECTORIZE
  96. * Used in conjucture with ARM_MATH_MVEI to let the compiler auto vectorize for the functions that uses inline
  97. * assembly. It does not affect functions that use C or intrinsics.
  98. * - ARM_MATH_BIG_ENDIAN:
  99. *
  100. * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. This is supported only for the legacy
  101. * functions i.e, functions targetted at TensorFlow Lite do not support big endianness. By default library builds for
  102. * little endian targets.
  103. *
  104. * - ARM_NN_TRUNCATE:
  105. *
  106. * Define macro ARM_NN_TRUNCATE to use floor instead of round-to-the-nearest-int for the computation.
  107. *
  108. *
  109. * Copyright Notice
  110. * ------------
  111. *
  112. * Copyright (C) 2010-2019 Arm Limited. All rights reserved.
  113. *
  114. * [1] CMSIS-NN: Efficient Neural Network Kernels for Arm Cortex-M CPUs https://arxiv.org/abs/1801.06601
  115. *
  116. * [2] Converting a Neural Network for Arm Cortex-M with CMSIS-NN
  117. *
  118. https://developer.arm.com/solutions/machine-learning-on-arm/developer-material/how-to-guides/converting-a-neural-network-for-arm-cortex-m-with-cmsis-nn/single-page
  119. * [3] https://www.tensorflow.org/lite/microcontrollers/library
  120. *
  121. * [4] https://github.com/ARM-software/CMSIS_5/tree/develop/CMSIS/NN#legacy-vs-tfl-micro-compliant-apis
  122. */
  123. /**
  124. * @defgroup groupNN Neural Network Functions
  125. * A collection of functions to perform basic operations for neural network layers. Functions with a _s8 suffix support
  126. * TensorFlow Lite framework.
  127. */
  128. #ifndef _ARM_NNFUNCTIONS_H
  129. #define _ARM_NNFUNCTIONS_H
  130. #include "arm_nn_math_types.h"
  131. #include "arm_nn_types.h"
  132. #define USE_INTRINSIC
  133. //#define ARM_NN_TRUNCATE /* This config the rounding model to floor or round to the nearest int */
  134. #ifdef __cplusplus
  135. extern "C" {
  136. #endif
  137. /**
  138. * @brief Struct for specifying activation function types
  139. *
  140. */
  141. typedef enum
  142. {
  143. ARM_SIGMOID = 0,
  144. /**< Sigmoid activation function */
  145. ARM_TANH = 1,
  146. /**< Tanh activation function */
  147. } arm_nn_activation_type;
  148. /**
  149. * @defgroup NNConv Convolution Functions
  150. *
  151. * Collection of convolution, depthwise convolution functions and their variants.
  152. *
  153. * The convolution is implemented in 2 steps: im2col and GEMM
  154. *
  155. * im2col is a process of converting each patch of image data into
  156. * a column. After im2col, the convolution is computed as matrix-matrix
  157. * multiplication.
  158. *
  159. * To reduce the memory footprint, the im2col is performed partially.
  160. * Each iteration, only a few column (i.e., patches) are generated and
  161. * computed with GEMM kernels similar to CMSIS-DSP arm_mat_mult functions.
  162. *
  163. */
  164. /**
  165. * @brief s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in
  166. cmsis-nn
  167. * to perform the convolution.
  168. *
  169. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  170. arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required
  171. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  172. * Range of conv_params->input_offset : [-127, 128]
  173. * Range of conv_params->output_offset : [-128, 127]
  174. * @param[in] quant_params Per-channel quantization info.
  175. * It contains the multiplier and shift values to be applied to each output channel
  176. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  177. * @param[in] input_data Input (activation) data pointer. Data type: int8
  178. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
  179. * spatial filter dimensions
  180. * @param[in] filter_data Filter data pointer. Data type: int8
  181. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  182. * @param[in] bias_data Bias data pointer. Data type: int32
  183. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  184. * @param[out] output_data Output data pointer. Data type: int8
  185. *
  186. * @return The function returns either
  187. * <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
  188. * <code>ARM_MATH_SUCCESS</code> on successful completion.
  189. *
  190. */
  191. arm_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx,
  192. const cmsis_nn_conv_params *conv_params,
  193. const cmsis_nn_per_channel_quant_params *quant_params,
  194. const cmsis_nn_dims *input_dims,
  195. const q7_t *input_data,
  196. const cmsis_nn_dims *filter_dims,
  197. const q7_t *filter_data,
  198. const cmsis_nn_dims *bias_dims,
  199. const int32_t *bias_data,
  200. const cmsis_nn_dims *output_dims,
  201. q7_t *output_data);
  202. /**
  203. * @brief Get the required buffer size for arm_convolve_wrapper_s8
  204. *
  205. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  206. * Range of conv_params->input_offset : [-127, 128]
  207. * Range of conv_params->output_offset : [-128, 127]
  208. * @param[in] input_dims Input (activation) dimensions. Format: [N, H, W, C_IN]
  209. * @param[in] filter_dims Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial
  210. * filter dimensions
  211. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  212. *
  213. * @return The function returns required buffer size(bytes)
  214. *
  215. */
  216. int32_t arm_convolve_wrapper_s8_get_buffer_size(const cmsis_nn_conv_params *conv_params,
  217. const cmsis_nn_dims *input_dims,
  218. const cmsis_nn_dims *filter_dims,
  219. const cmsis_nn_dims *output_dims);
  220. /**
  221. * @brief s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in
  222. cmsis-nn
  223. * to perform the convolution.
  224. *
  225. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  226. arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required
  227. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  228. * conv_params->input_offset : Not used
  229. * conv_params->output_offset : Not used
  230. * @param[in] quant_params Per-channel quantization info.
  231. * It contains the multiplier and shift values to be applied to each output channel
  232. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  233. * @param[in] input_data Input (activation) data pointer. Data type: int16
  234. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
  235. * spatial filter dimensions
  236. * @param[in] filter_data Filter data pointer. Data type: int8
  237. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  238. * @param[in] bias_data Bias data pointer. Data type: int64
  239. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  240. * @param[out] output_data Output data pointer. Data type: int16
  241. *
  242. * @return The function returns either
  243. * <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
  244. * <code>ARM_MATH_SUCCESS</code> on successful completion.
  245. *
  246. */
  247. arm_status arm_convolve_wrapper_s16(const cmsis_nn_context *ctx,
  248. const cmsis_nn_conv_params *conv_params,
  249. const cmsis_nn_per_channel_quant_params *quant_params,
  250. const cmsis_nn_dims *input_dims,
  251. const q15_t *input_data,
  252. const cmsis_nn_dims *filter_dims,
  253. const q7_t *filter_data,
  254. const cmsis_nn_dims *bias_dims,
  255. const int64_t *bias_data,
  256. const cmsis_nn_dims *output_dims,
  257. q15_t *output_data);
  258. /**
  259. * @brief Get the required buffer size for arm_convolve_wrapper_s16
  260. *
  261. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  262. * conv_params->input_offset : Not used
  263. * conv_params->output_offset : Not used
  264. * @param[in] input_dims Input (activation) dimensions. Format: [N, H, W, C_IN]
  265. * @param[in] filter_dims Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial
  266. * filter dimensions
  267. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  268. *
  269. * @return The function returns required buffer size(bytes)
  270. *
  271. */
  272. int32_t arm_convolve_wrapper_s16_get_buffer_size(const cmsis_nn_conv_params *conv_params,
  273. const cmsis_nn_dims *input_dims,
  274. const cmsis_nn_dims *filter_dims,
  275. const cmsis_nn_dims *output_dims);
  276. /**
  277. * @brief Basic s8 convolution function
  278. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  279. arm_convolve_s8_get_buffer_size will return the buffer_size if required
  280. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  281. * Range of conv_params->input_offset : [-127, 128]
  282. * Range of conv_params->output_offset : [-128, 127]
  283. * @param[in] quant_params Per-channel quantization info.
  284. * It contains the multiplier and shift values to be applied to each output channel
  285. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  286. * @param[in] input_data Input (activation) data pointer. Data type: int8
  287. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
  288. * spatial filter dimensions
  289. * @param[in] filter_data Filter data pointer. Data type: int8
  290. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  291. * @param[in] bias_data Optional bias data pointer. Data type: int32
  292. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  293. * @param[out] output_data Output data pointer. Data type: int8
  294. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  295. *
  296. * @details
  297. * 1. Supported framework: TensorFlow Lite micro
  298. * 2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
  299. * 3. Additional memory is required for optimization. Refer to argument 'ctx' for details.
  300. *
  301. */
  302. arm_status arm_convolve_s8(const cmsis_nn_context *ctx,
  303. const cmsis_nn_conv_params *conv_params,
  304. const cmsis_nn_per_channel_quant_params *quant_params,
  305. const cmsis_nn_dims *input_dims,
  306. const q7_t *input_data,
  307. const cmsis_nn_dims *filter_dims,
  308. const q7_t *filter_data,
  309. const cmsis_nn_dims *bias_dims,
  310. const int32_t *bias_data,
  311. const cmsis_nn_dims *output_dims,
  312. q7_t *output_data);
  313. /**
  314. * @brief Get the required buffer size for s8 convolution function
  315. *
  316. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  317. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
  318. * are the spatial filter dimensions
  319. * @return The function returns required buffer size(bytes)
  320. *
  321. */
  322. int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
  323. /**
  324. * @brief Basic s16 convolution function
  325. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  326. arm_convolve_s16_get_buffer_size will return the buffer_size if required
  327. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  328. * conv_params->input_offset : Not used
  329. * conv_params->output_offset : Not used
  330. * @param[in] quant_params Per-channel quantization info.
  331. * It contains the multiplier and shift values to be applied to each output channel
  332. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  333. * @param[in] input_data Input (activation) data pointer. Data type: int16
  334. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
  335. * spatial filter dimensions
  336. * @param[in] filter_data Filter data pointer. Data type: int8
  337. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  338. * @param[in] bias_data Optional bias data pointer. Data type: int64
  339. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  340. * @param[out] output_data Output data pointer. Data type: int16
  341. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  342. *
  343. * @details
  344. * 1. Supported framework: TensorFlow Lite micro
  345. * 2. q7/q15 is used as data type eventhough it is s8/s16 data. It is done so to be consistent with existing APIs.
  346. * 3. Additional memory is required for optimization. Refer to argument 'ctx' for details.
  347. *
  348. */
  349. arm_status arm_convolve_s16(const cmsis_nn_context *ctx,
  350. const cmsis_nn_conv_params *conv_params,
  351. const cmsis_nn_per_channel_quant_params *quant_params,
  352. const cmsis_nn_dims *input_dims,
  353. const q15_t *input_data,
  354. const cmsis_nn_dims *filter_dims,
  355. const q7_t *filter_data,
  356. const cmsis_nn_dims *bias_dims,
  357. const int64_t *bias_data,
  358. const cmsis_nn_dims *output_dims,
  359. q15_t *output_data);
  360. /**
  361. * @brief Optimized s16 convolution function
  362. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  363. arm_convolve_fast_s16_get_buffer_size will return the buffer_size if required
  364. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  365. * conv_params->input_offset : Not used
  366. * conv_params->output_offset : Not used
  367. * @param[in] quant_params Per-channel quantization info.
  368. * It contains the multiplier and shift values to be applied to each output channel
  369. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  370. * @param[in] input_data Input (activation) data pointer. Data type: int16
  371. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
  372. * spatial filter dimensions. (filter_dims->w * filter_dims->h * input_dims->c) must not
  373. exceed 512
  374. * @param[in] filter_data Filter data pointer. Data type: int8
  375. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  376. * @param[in] bias_data Optional bias data pointer. Data type: int64
  377. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  378. * @param[out] output_data Output data pointer. Data type: int16
  379. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  380. *
  381. * @details
  382. * 1. Supported framework: TensorFlow Lite micro
  383. * 2. q7/q15 is used as data type eventhough it is s8/s16 data. It is done so to be consistent with existing APIs.
  384. * 3. Additional memory is required for optimization. Refer to argument 'ctx' for details.
  385. * 4. Implementation supports kernel volumes (filter width * filter height * input channels) < 512.
  386. *
  387. */
  388. arm_status arm_convolve_fast_s16(const cmsis_nn_context *ctx,
  389. const cmsis_nn_conv_params *conv_params,
  390. const cmsis_nn_per_channel_quant_params *quant_params,
  391. const cmsis_nn_dims *input_dims,
  392. const q15_t *input_data,
  393. const cmsis_nn_dims *filter_dims,
  394. const q7_t *filter_data,
  395. const cmsis_nn_dims *bias_dims,
  396. const int64_t *bias_data,
  397. const cmsis_nn_dims *output_dims,
  398. q15_t *output_data);
  399. /**
  400. * @brief Get the required buffer size for s16 convolution function
  401. *
  402. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  403. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
  404. * are the spatial filter dimensions
  405. * @return The function returns required buffer size(bytes)
  406. *
  407. */
  408. int32_t arm_convolve_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
  409. /**
  410. * @brief Get the required buffer size for fast s16 convolution function
  411. *
  412. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  413. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
  414. * are the spatial filter dimensions
  415. * @return The function returns required buffer size(bytes)
  416. *
  417. */
  418. int32_t arm_convolve_fast_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
  419. /**
  420. * @brief Basic Q7 convolution function
  421. * @param[in] Im_in pointer to input tensor
  422. * @param[in] dim_im_in input tensor dimension
  423. * @param[in] ch_im_in number of input tensor channels
  424. * @param[in] wt pointer to kernel weights
  425. * @param[in] ch_im_out number of filters, i.e., output tensor channels
  426. * @param[in] dim_kernel filter kernel size
  427. * @param[in] padding padding sizes
  428. * @param[in] stride convolution stride
  429. * @param[in] bias pointer to bias
  430. * @param[in] bias_shift amount of left-shift for bias
  431. * @param[in] out_shift amount of right-shift for output
  432. * @param[in,out] Im_out pointer to output tensor
  433. * @param[in] dim_im_out output tensor dimension
  434. * @param[in,out] bufferA pointer to buffer space for input
  435. * @param[in,out] bufferB pointer to buffer space for output
  436. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  437. *
  438. */
  439. arm_status arm_convolve_HWC_q7_basic(const q7_t *Im_in,
  440. const uint16_t dim_im_in,
  441. const uint16_t ch_im_in,
  442. const q7_t *wt,
  443. const uint16_t ch_im_out,
  444. const uint16_t dim_kernel,
  445. const uint16_t padding,
  446. const uint16_t stride,
  447. const q7_t *bias,
  448. const uint16_t bias_shift,
  449. const uint16_t out_shift,
  450. q7_t *Im_out,
  451. const uint16_t dim_im_out,
  452. q15_t *bufferA,
  453. q7_t *bufferB);
  454. /**
  455. * @brief Basic Q7 convolution function (non-square shape)
  456. * @param[in] Im_in pointer to input tensor
  457. * @param[in] dim_im_in_x input tensor dimension x
  458. * @param[in] dim_im_in_y input tensor dimension y
  459. * @param[in] ch_im_in number of input tensor channels
  460. * @param[in] wt pointer to kernel weights
  461. * @param[in] ch_im_out number of filters, i.e., output tensor channels
  462. * @param[in] dim_kernel_x filter kernel size x
  463. * @param[in] dim_kernel_y filter kernel size y
  464. * @param[in] padding_x padding size x
  465. * @param[in] padding_y padding size y
  466. * @param[in] stride_x convolution stride x
  467. * @param[in] stride_y convolution stride y
  468. * @param[in] bias pointer to bias
  469. * @param[in] bias_shift amount of left-shift for bias
  470. * @param[in] out_shift amount of right-shift for output
  471. * @param[in,out] Im_out pointer to output tensor
  472. * @param[in] dim_im_out_x output tensor dimension x
  473. * @param[in] dim_im_out_y output tensor dimension y
  474. * @param[in,out] bufferA pointer to buffer space for input
  475. * @param[in,out] bufferB pointer to buffer space for output
  476. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  477. */
  478. arm_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in,
  479. const uint16_t dim_im_in_x,
  480. const uint16_t dim_im_in_y,
  481. const uint16_t ch_im_in,
  482. const q7_t *wt,
  483. const uint16_t ch_im_out,
  484. const uint16_t dim_kernel_x,
  485. const uint16_t dim_kernel_y,
  486. const uint16_t padding_x,
  487. const uint16_t padding_y,
  488. const uint16_t stride_x,
  489. const uint16_t stride_y,
  490. const q7_t *bias,
  491. const uint16_t bias_shift,
  492. const uint16_t out_shift,
  493. q7_t *Im_out,
  494. const uint16_t dim_im_out_x,
  495. const uint16_t dim_im_out_y,
  496. q15_t *bufferA,
  497. q7_t *bufferB);
  498. /**
  499. * @brief Basic Q15 convolution function
  500. * @param[in] Im_in pointer to input tensor
  501. * @param[in] dim_im_in input tensor dimension
  502. * @param[in] ch_im_in number of input tensor channels
  503. * @param[in] wt pointer to kernel weights
  504. * @param[in] ch_im_out number of filters, i.e., output tensor channels
  505. * @param[in] dim_kernel filter kernel size
  506. * @param[in] padding padding sizes
  507. * @param[in] stride convolution stride
  508. * @param[in] bias pointer to bias
  509. * @param[in] bias_shift amount of left-shift for bias
  510. * @param[in] out_shift amount of right-shift for output
  511. * @param[in,out] Im_out pointer to output tensor
  512. * @param[in] dim_im_out output tensor dimension
  513. * @param[in,out] bufferA pointer to buffer space for input
  514. * @param[in,out] bufferB pointer to buffer space for output
  515. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  516. *
  517. */
  518. arm_status arm_convolve_HWC_q15_basic(const q15_t *Im_in,
  519. const uint16_t dim_im_in,
  520. const uint16_t ch_im_in,
  521. const q15_t *wt,
  522. const uint16_t ch_im_out,
  523. const uint16_t dim_kernel,
  524. const uint16_t padding,
  525. const uint16_t stride,
  526. const q15_t *bias,
  527. const uint16_t bias_shift,
  528. const uint16_t out_shift,
  529. q15_t *Im_out,
  530. const uint16_t dim_im_out,
  531. q15_t *bufferA,
  532. q7_t *bufferB);
  533. /**
  534. * @brief Fast Q7 convolution function
  535. * @param[in] Im_in pointer to input tensor
  536. * @param[in] dim_im_in input tensor dimension
  537. * @param[in] ch_im_in number of input tensor channels
  538. * @param[in] wt pointer to kernel weights
  539. * @param[in] ch_im_out number of filters, i.e., output tensor channels
  540. * @param[in] dim_kernel filter kernel size
  541. * @param[in] padding padding sizes
  542. * @param[in] stride convolution stride
  543. * @param[in] bias pointer to bias
  544. * @param[in] bias_shift amount of left-shift for bias
  545. * @param[in] out_shift amount of right-shift for output
  546. * @param[in,out] Im_out pointer to output tensor
  547. * @param[in] dim_im_out output tensor dimension
  548. * @param[in,out] bufferA pointer to buffer space for input
  549. * @param[in,out] bufferB pointer to buffer space for output
  550. * @return The function returns either
  551. * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
  552. *
  553. * This function is the version with full list of optimization tricks, but with
  554. * some contraints:
  555. * ch_im_in is multiple of 4
  556. * ch_im_out is multiple of 2
  557. */
  558. arm_status arm_convolve_HWC_q7_fast(const q7_t *Im_in,
  559. const uint16_t dim_im_in,
  560. const uint16_t ch_im_in,
  561. const q7_t *wt,
  562. const uint16_t ch_im_out,
  563. const uint16_t dim_kernel,
  564. const uint16_t padding,
  565. const uint16_t stride,
  566. const q7_t *bias,
  567. const uint16_t bias_shift,
  568. const uint16_t out_shift,
  569. q7_t *Im_out,
  570. const uint16_t dim_im_out,
  571. q15_t *bufferA,
  572. q7_t *bufferB);
  573. /**
  574. * @brief Fast Q7 convolution function (non-sqaure shape)
  575. * @param[in] Im_in pointer to input tensor
  576. * @param[in] dim_im_in_x input tensor dimension x
  577. * @param[in] dim_im_in_y input tensor dimension y
  578. * @param[in] ch_im_in number of input tensor channels
  579. * @param[in] wt pointer to kernel weights
  580. * @param[in] ch_im_out number of filters, i.e., output tensor channels
  581. * @param[in] dim_kernel_x filter kernel size x
  582. * @param[in] dim_kernel_y filter kernel size y
  583. * @param[in] padding_x padding size x
  584. * @param[in] padding_y padding size y
  585. * @param[in] stride_x convolution stride x
  586. * @param[in] stride_y convolution stride y
  587. * @param[in] bias pointer to bias
  588. * @param[in] bias_shift amount of left-shift for bias
  589. * @param[in] out_shift amount of right-shift for output
  590. * @param[in,out] Im_out pointer to output tensor
  591. * @param[in] dim_im_out_x output tensor dimension x
  592. * @param[in] dim_im_out_y output tensor dimension y
  593. * @param[in,out] bufferA pointer to buffer space for input
  594. * @param[in,out] bufferB pointer to buffer space for output
  595. * @return The function returns either
  596. * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
  597. *
  598. * This function is the version with full list of optimization tricks, but with
  599. * some contraints:
  600. * ch_im_in is multiple of 4
  601. * ch_im_out is multiple of 2
  602. */
  603. arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in,
  604. const uint16_t dim_im_in_x,
  605. const uint16_t dim_im_in_y,
  606. const uint16_t ch_im_in,
  607. const q7_t *wt,
  608. const uint16_t ch_im_out,
  609. const uint16_t dim_kernel_x,
  610. const uint16_t dim_kernel_y,
  611. const uint16_t padding_x,
  612. const uint16_t padding_y,
  613. const uint16_t stride_x,
  614. const uint16_t stride_y,
  615. const q7_t *bias,
  616. const uint16_t bias_shift,
  617. const uint16_t out_shift,
  618. q7_t *Im_out,
  619. const uint16_t dim_im_out_x,
  620. const uint16_t dim_im_out_y,
  621. q15_t *bufferA,
  622. q7_t *bufferB);
  623. /**
  624. * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape)
  625. * @param[in] Im_in pointer to input tensor
  626. * @param[in] dim_im_in_x input tensor dimension x
  627. * @param[in] dim_im_in_y input tensor dimension y
  628. * @param[in] ch_im_in number of input tensor channels
  629. * @param[in] wt pointer to kernel weights
  630. * @param[in] ch_im_out number of filters, i.e., output tensor channels
  631. * @param[in] dim_kernel_x filter kernel size x
  632. * @param[in] dim_kernel_y filter kernel size y
  633. * @param[in] padding_x padding size x
  634. * @param[in] padding_y padding size y
  635. * @param[in] stride_x convolution stride x
  636. * @param[in] stride_y convolution stride y
  637. * @param[in] bias pointer to bias
  638. * @param[in] bias_shift amount of left-shift for bias
  639. * @param[in] out_shift amount of right-shift for output
  640. * @param[in,out] Im_out pointer to output tensor
  641. * @param[in] dim_im_out_x output tensor dimension x
  642. * @param[in] dim_im_out_y output tensor dimension y
  643. * @param[in,out] bufferA pointer to buffer space for input
  644. * @param[in,out] bufferB pointer to buffer space for output
  645. * @return The function returns either
  646. * <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
  647. * <code>ARM_MATH_SUCCESS</code> on successful completion.
  648. *
  649. * This function implement convolution with 1x1 kernel size (i.e., dim_kernel_x=1
  650. * and dim_kernel_y=1). It can be used for
  651. * second half of MobileNets after depthwise separable convolution.
  652. *
  653. * This function is the version with full list of optimization tricks, but with
  654. * some contraints:
  655. * ch_im_in is multiple of 4
  656. * ch_im_out is multiple of 2
  657. */
  658. arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in,
  659. const uint16_t dim_im_in_x,
  660. const uint16_t dim_im_in_y,
  661. const uint16_t ch_im_in,
  662. const q7_t *wt,
  663. const uint16_t ch_im_out,
  664. const uint16_t dim_kernel_x,
  665. const uint16_t dim_kernel_y,
  666. const uint16_t padding_x,
  667. const uint16_t padding_y,
  668. const uint16_t stride_x,
  669. const uint16_t stride_y,
  670. const q7_t *bias,
  671. const uint16_t bias_shift,
  672. const uint16_t out_shift,
  673. q7_t *Im_out,
  674. const uint16_t dim_im_out_x,
  675. const uint16_t dim_im_out_y,
  676. q15_t *bufferA,
  677. q7_t *bufferB);
  678. /**
  679. * @brief Fast s8 version for 1x1 convolution (non-square shape)
  680. *
  681. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  682. arm_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required
  683. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  684. * Range of conv_params->input_offset : [-127, 128]
  685. * Range of conv_params->output_offset : [-128, 127]
  686. * @param[in] quant_params Per-channel quantization info.
  687. * It contains the multiplier and shift values to be applied to each output channel
  688. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  689. * @param[in] input_data Input (activation) data pointer. Data type: int8
  690. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN]
  691. * @param[in] filter_data Filter data pointer. Data type: int8
  692. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  693. * @param[in] bias_data Optional bias data pointer. Data type: int32
  694. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  695. * @param[out] output_data Output data pointer. Data type: int8
  696. *
  697. * @return The function returns either
  698. * <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
  699. * <code>ARM_MATH_SUCCESS</code> on successful completion.
  700. *
  701. * @details
  702. * - Supported framework : TensorFlow Lite Micro
  703. * - The following constrains on the arguments apply
  704. * -# input_dims->c is a multiple of 4
  705. * -# conv_params->padding.w = conv_params->padding.h = 0
  706. * -# conv_params->stride.w = conv_params->stride.h = 1
  707. *
  708. */
  709. arm_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx,
  710. const cmsis_nn_conv_params *conv_params,
  711. const cmsis_nn_per_channel_quant_params *quant_params,
  712. const cmsis_nn_dims *input_dims,
  713. const q7_t *input_data,
  714. const cmsis_nn_dims *filter_dims,
  715. const q7_t *filter_data,
  716. const cmsis_nn_dims *bias_dims,
  717. const int32_t *bias_data,
  718. const cmsis_nn_dims *output_dims,
  719. q7_t *output_data);
  720. /**
  721. * @brief Get the required buffer size for arm_convolve_1x1_s8_fast
  722. *
  723. * @param[in] input_dims Input (activation) dimensions
  724. * @return The function returns the required buffer size in bytes
  725. *
  726. */
  727. int32_t arm_convolve_1x1_s8_fast_get_buffer_size(const cmsis_nn_dims *input_dims);
  728. /**
  729. * @brief 1xn convolution
  730. *
  731. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  732. arm_convolve_1_x_n_s8_get_buffer_size will return the buffer_size if required
  733. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  734. * Range of conv_params->input_offset : [-127, 128]
  735. * Range of conv_params->output_offset : [-128, 127]
  736. * @param[in] quant_params Per-channel quantization info.
  737. * It contains the multiplier and shift values to be applied to each output channel
  738. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  739. * @param[in] input_data Input (activation) data pointer. Data type: int8
  740. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal
  741. * spatial filter dimension
  742. * @param[in] filter_data Filter data pointer. Data type: int8
  743. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  744. * @param[in] bias_data Optional bias data pointer. Data type: int32
  745. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  746. * @param[out] output_data Output data pointer. Data type: int8
  747. *
  748. * @return The function returns either
  749. * <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
  750. * <code>ARM_MATH_SUCCESS</code> on successful completion.
  751. *
  752. * @details
  753. * - Supported framework : TensorFlow Lite Micro
  754. * - The following constrains on the arguments apply
  755. * -# input_dims->n equals 1
  756. * -# ouput_dims->w is a multiple of 4
  757. * -# Explicit constraints(since it is for 1xN convolution)
  758. * -## input_dims->h equals 1
  759. * -## output_dims->h equals 1
  760. * -## filter_dims->h equals 1
  761. *@todo Remove constraint on output_dims->w to make the function generic.
  762. *
  763. */
  764. arm_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
  765. const cmsis_nn_conv_params *conv_params,
  766. const cmsis_nn_per_channel_quant_params *quant_params,
  767. const cmsis_nn_dims *input_dims,
  768. const q7_t *input_data,
  769. const cmsis_nn_dims *filter_dims,
  770. const q7_t *filter_data,
  771. const cmsis_nn_dims *bias_dims,
  772. const int32_t *bias_data,
  773. const cmsis_nn_dims *output_dims,
  774. q7_t *output_data);
  775. /**
  776. * @brief Get the required additional buffer size for 1xn convolution
  777. *
  778. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  779. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the
  780. * horizontal spatial filter dimension
  781. * @return The function returns required buffer size(bytes)
  782. *
  783. */
  784. int32_t arm_convolve_1_x_n_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
  785. /**
  786. * @brief Q7 version of convolution for RGB image
  787. * @param[in] Im_in pointer to input tensor
  788. * @param[in] dim_im_in input tensor dimension
  789. * @param[in] ch_im_in number of input tensor channels
  790. * @param[in] wt pointer to kernel weights
  791. * @param[in] ch_im_out number of filters, i.e., output tensor channels
  792. * @param[in] dim_kernel filter kernel size
  793. * @param[in] padding padding sizes
  794. * @param[in] stride convolution stride
  795. * @param[in] bias pointer to bias
  796. * @param[in] bias_shift amount of left-shift for bias
  797. * @param[in] out_shift amount of right-shift for output
  798. * @param[in,out] Im_out pointer to output tensor
  799. * @param[in] dim_im_out output tensor dimension
  800. * @param[in,out] bufferA pointer to buffer space for input
  801. * @param[in,out] bufferB pointer to buffer space for output
  802. * @return The function returns either
  803. * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
  804. *
  805. * This kernel is written exclusively for convolution with ch_im_in
  806. * equals 3. This applies on the first layer of CNNs which has input
  807. * image with RGB format.
  808. */
  809. arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in,
  810. const uint16_t dim_im_in,
  811. const uint16_t ch_im_in,
  812. const q7_t *wt,
  813. const uint16_t ch_im_out,
  814. const uint16_t dim_kernel,
  815. const uint16_t padding,
  816. const uint16_t stride,
  817. const q7_t *bias,
  818. const uint16_t bias_shift,
  819. const uint16_t out_shift,
  820. q7_t *Im_out,
  821. const uint16_t dim_im_out,
  822. q15_t *bufferA,
  823. q7_t *bufferB);
  824. /**
  825. * @brief Fast Q15 convolution function
  826. * @param[in] Im_in pointer to input tensor
  827. * @param[in] dim_im_in input tensor dimension
  828. * @param[in] ch_im_in number of input tensor channels
  829. * @param[in] wt pointer to kernel weights
  830. * @param[in] ch_im_out number of filters, i.e., output tensor channels
  831. * @param[in] dim_kernel filter kernel size
  832. * @param[in] padding padding sizes
  833. * @param[in] stride convolution stride
  834. * @param[in] bias pointer to bias
  835. * @param[in] bias_shift amount of left-shift for bias
  836. * @param[in] out_shift amount of right-shift for output
  837. * @param[in,out] Im_out pointer to output tensor
  838. * @param[in] dim_im_out output tensor dimension
  839. * @param[in,out] bufferA pointer to buffer space for input
  840. * @param[in,out] bufferB pointer to buffer space for output
  841. * @return The function returns either
  842. * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
  843. *
  844. * This function is the version with full list of optimization tricks, but with
  845. * some contraints:
  846. * ch_im_in is multiple of 2
  847. * ch_im_out is multiple of 2
  848. * dim_im_out is a multiple of 2
  849. */
  850. arm_status arm_convolve_HWC_q15_fast(const q15_t *Im_in,
  851. const uint16_t dim_im_in,
  852. const uint16_t ch_im_in,
  853. const q15_t *wt,
  854. const uint16_t ch_im_out,
  855. const uint16_t dim_kernel,
  856. const uint16_t padding,
  857. const uint16_t stride,
  858. const q15_t *bias,
  859. const uint16_t bias_shift,
  860. const uint16_t out_shift,
  861. q15_t *Im_out,
  862. const uint16_t dim_im_out,
  863. q15_t *bufferA,
  864. q7_t *bufferB);
  865. /**
  866. * @brief Fast Q15 convolution function (non-sqaure shape)
  867. * @param[in] Im_in pointer to input tensor
  868. * @param[in] dim_im_in_x input tensor dimension x
  869. * @param[in] dim_im_in_y input tensor dimension y
  870. * @param[in] ch_im_in number of input tensor channels
  871. * @param[in] wt pointer to kernel weights
  872. * @param[in] ch_im_out number of filters, i.e., output tensor channels
  873. * @param[in] dim_kernel_x filter kernel size x
  874. * @param[in] dim_kernel_y filter kernel size y
  875. * @param[in] padding_x padding size x
  876. * @param[in] padding_y padding size y
  877. * @param[in] stride_x convolution stride x
  878. * @param[in] stride_y convolution stride y
  879. * @param[in] bias pointer to bias
  880. * @param[in] bias_shift amount of left-shift for bias
  881. * @param[in] out_shift amount of right-shift for output
  882. * @param[in,out] Im_out pointer to output tensor
  883. * @param[in] dim_im_out_x output tensor dimension x
  884. * @param[in] dim_im_out_y output tensor dimension y
  885. * @param[in,out] bufferA pointer to buffer space for input
  886. * @param[in,out] bufferB pointer to buffer space for output
  887. * @return The function returns either
  888. * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
  889. *
  890. * @details
  891. *
  892. * <b>Buffer size:</b>
  893. *
  894. * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
  895. *
  896. * bufferB size: 0
  897. *
  898. * <b>Input dimension constraints:</b>
  899. *
  900. * ch_im_in is multiple of 2
  901. *
  902. * ch_im_out is multipe of 2
  903. *
  904. */
  905. arm_status arm_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in,
  906. const uint16_t dim_im_in_x,
  907. const uint16_t dim_im_in_y,
  908. const uint16_t ch_im_in,
  909. const q15_t *wt,
  910. const uint16_t ch_im_out,
  911. const uint16_t dim_kernel_x,
  912. const uint16_t dim_kernel_y,
  913. const uint16_t padding_x,
  914. const uint16_t padding_y,
  915. const uint16_t stride_x,
  916. const uint16_t stride_y,
  917. const q15_t *bias,
  918. const uint16_t bias_shift,
  919. const uint16_t out_shift,
  920. q15_t *Im_out,
  921. const uint16_t dim_im_out_x,
  922. const uint16_t dim_im_out_y,
  923. q15_t *bufferA,
  924. q7_t *bufferB);
  925. /**
  926. * @brief Q7 depthwise separable convolution function
  927. * @param[in] Im_in pointer to input tensor
  928. * @param[in] dim_im_in input tensor dimension
  929. * @param[in] ch_im_in number of input tensor channels
  930. * @param[in] wt pointer to kernel weights
  931. * @param[in] ch_im_out number of filters, i.e., output tensor channels
  932. * @param[in] dim_kernel filter kernel size
  933. * @param[in] padding padding sizes
  934. * @param[in] stride convolution stride
  935. * @param[in] bias pointer to bias
  936. * @param[in] bias_shift amount of left-shift for bias
  937. * @param[in] out_shift amount of right-shift for output
  938. * @param[in,out] Im_out pointer to output tensor
  939. * @param[in] dim_im_out output tensor dimension
  940. * @param[in,out] bufferA pointer to buffer space for input
  941. * @param[in,out] bufferB pointer to buffer space for output
  942. * @return The function returns either
  943. * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
  944. *
  945. * This function is the version with full list of optimization tricks, but with
  946. * some contraints:
  947. * ch_im_in is multiple of 2
  948. * ch_im_out is multiple of 2
  949. */
  950. arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in,
  951. const uint16_t dim_im_in,
  952. const uint16_t ch_im_in,
  953. const q7_t *wt,
  954. const uint16_t ch_im_out,
  955. const uint16_t dim_kernel,
  956. const uint16_t padding,
  957. const uint16_t stride,
  958. const q7_t *bias,
  959. const uint16_t bias_shift,
  960. const uint16_t out_shift,
  961. q7_t *Im_out,
  962. const uint16_t dim_im_out,
  963. q15_t *bufferA,
  964. q7_t *bufferB);
  965. /**
  966. * @brief Q7 depthwise separable convolution function (non-square shape)
  967. * @param[in] Im_in pointer to input tensor
  968. * @param[in] dim_im_in_x input tensor dimension x
  969. * @param[in] dim_im_in_y input tensor dimension y
  970. * @param[in] ch_im_in number of input tensor channels
  971. * @param[in] wt pointer to kernel weights
  972. * @param[in] ch_im_out number of filters, i.e., output tensor channels
  973. * @param[in] dim_kernel_x filter kernel size x
  974. * @param[in] dim_kernel_y filter kernel size y
  975. * @param[in] padding_x padding sizes x
  976. * @param[in] padding_y padding sizes y
  977. * @param[in] stride_x convolution stride x
  978. * @param[in] stride_y convolution stride y
  979. * @param[in] bias pointer to bias
  980. * @param[in] bias_shift amount of left-shift for bias
  981. * @param[in] out_shift amount of right-shift for output
  982. * @param[in,out] Im_out pointer to output tensor
  983. * @param[in] dim_im_out_x output tensor dimension x
  984. * @param[in] dim_im_out_y output tensor dimension y
  985. * @param[in,out] bufferA pointer to buffer space for input
  986. * @param[in,out] bufferB pointer to buffer space for output
  987. * @return The function returns either
  988. * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
  989. *
  990. * This function is the version with full list of optimization tricks, but with
  991. * some contraints:
  992. * ch_im_in is multiple of 2
  993. * ch_im_out is multiple of 2
  994. */
  995. arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,
  996. const uint16_t dim_im_in_x,
  997. const uint16_t dim_im_in_y,
  998. const uint16_t ch_im_in,
  999. const q7_t *wt,
  1000. const uint16_t ch_im_out,
  1001. const uint16_t dim_kernel_x,
  1002. const uint16_t dim_kernel_y,
  1003. const uint16_t padding_x,
  1004. const uint16_t padding_y,
  1005. const uint16_t stride_x,
  1006. const uint16_t stride_y,
  1007. const q7_t *bias,
  1008. const uint16_t bias_shift,
  1009. const uint16_t out_shift,
  1010. q7_t *Im_out,
  1011. const uint16_t dim_im_out_x,
  1012. const uint16_t dim_im_out_y,
  1013. q15_t *bufferA,
  1014. q7_t *bufferB);
  1015. /**
  1016. * @brief Wrapper function to pick the right optimized s8 depthwise convolution function
  1017. *
  1018. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1019. * definition file to see if an additional buffer is required.
  1020. * Optional function {API}_get_buffer_size() provides the buffer
  1021. * size if required.
  1022. * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
  1023. * dw_conv_params->dilation is not used.
  1024. * Range of dw_conv_params->input_offset : [-127, 128]
  1025. * Range of dw_conv_params->output_offset : [-128, 127]
  1026. * @param[in] quant_params Per-channel quantization info.
  1027. * It contains the multiplier and shift values to be applied to each
  1028. * output channel
  1029. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  1030. * Batch argument N is not used and assumed to be 1.
  1031. * @param[in] input_data Input (activation) data pointer. Data type: int8
  1032. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  1033. * @param[in] filter_data Filter data pointer. Data type: int8
  1034. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  1035. * @param[in] bias_data Bias data pointer. Data type: int32
  1036. * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT]
  1037. * @param[in, out] output_data Output data pointer. Data type: int8
  1038. * @return The function returns
  1039. * <code>ARM_MATH_SUCCESS</code> - Successful completion.
  1040. *
  1041. * @details
  1042. * - Supported framework: TensorFlow Lite
  1043. * - Picks one of the the following functions
  1044. * -# arm_depthwise_conv_s8()
  1045. * -# arm_depthwise_conv_3x3_s8() - Cortex-M CPUs with DSP extension only
  1046. * -# arm_depthwise_conv_s8_opt()
  1047. * - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
  1048. * - Check details of arm_depthwise_conv_s8_opt() for potential data that can be accessed outside of the
  1049. * boundary.
  1050. */
  1051. arm_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx,
  1052. const cmsis_nn_dw_conv_params *dw_conv_params,
  1053. const cmsis_nn_per_channel_quant_params *quant_params,
  1054. const cmsis_nn_dims *input_dims,
  1055. const q7_t *input_data,
  1056. const cmsis_nn_dims *filter_dims,
  1057. const q7_t *filter_data,
  1058. const cmsis_nn_dims *bias_dims,
  1059. const int32_t *bias_data,
  1060. const cmsis_nn_dims *output_dims,
  1061. q7_t *output_data);
  1062. /**
  1063. * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s8()
  1064. *
  1065. * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
  1066. * dw_conv_params->dilation is not used.
  1067. * Range of dw_conv_params->input_offset : [-127, 128]
  1068. * Range of dw_conv_params->input_offset : [-128, 127]
  1069. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  1070. * Batch argument N is not used and assumed to be 1.
  1071. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  1072. * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT]
  1073. * @return Size of additional memory required for optimizations in bytes.
  1074. *
  1075. */
  1076. int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params,
  1077. const cmsis_nn_dims *input_dims,
  1078. const cmsis_nn_dims *filter_dims,
  1079. const cmsis_nn_dims *output_dims);
  1080. /**
  1081. * @brief Basic s8 depthwise convolution function that doesn't have any constraints on the input dimensions.
  1082. *
  1083. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1084. * definition file to see if an additional buffer is required.
  1085. * Optional function {API}_get_buffer_size() provides the buffer
  1086. * size if an additional buffer is required.
  1087. * exists if additional memory is.
  1088. * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
  1089. * dw_conv_params->dilation is not used.
  1090. * Range of dw_conv_params->input_offset : [-127, 128]
  1091. * Range of dw_conv_params->input_offset : [-128, 127]
  1092. * @param[in] quant_params Per-channel quantization info.
  1093. * It contains the multiplier and shift values to be applied to each
  1094. * output channel
  1095. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  1096. * Batch argument N is not used.
  1097. * @param[in] input_data Input (activation) data pointer. Data type: int8
  1098. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  1099. * @param[in] filter_data Filter data pointer. Data type: int8
  1100. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  1101. * @param[in] bias_data Bias data pointer. Data type: int32
  1102. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  1103. * @param[in, out] output_data Output data pointer. Data type: int8
  1104. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  1105. *
  1106. * @details
  1107. * - Supported framework: TensorFlow Lite
  1108. * - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
  1109. */
  1110. arm_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx,
  1111. const cmsis_nn_dw_conv_params *dw_conv_params,
  1112. const cmsis_nn_per_channel_quant_params *quant_params,
  1113. const cmsis_nn_dims *input_dims,
  1114. const q7_t *input_data,
  1115. const cmsis_nn_dims *filter_dims,
  1116. const q7_t *filter_data,
  1117. const cmsis_nn_dims *bias_dims,
  1118. const int32_t *bias_data,
  1119. const cmsis_nn_dims *output_dims,
  1120. q7_t *output_data);
  1121. /**
  1122. * @brief Basic s16 depthwise convolution function that doesn't have any constraints on the input dimensions.
  1123. *
  1124. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1125. * definition file to see if an additional buffer is required.
  1126. * Optional function {API}_get_buffer_size() provides the buffer
  1127. * size if an additional buffer is required.
  1128. * exists if additional memory is.
  1129. * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
  1130. * conv_params->input_offset : Not used
  1131. * conv_params->output_offset : Not used
  1132. * @param[in] quant_params Per-channel quantization info.
  1133. * It contains the multiplier and shift values to be applied to each
  1134. * output channel
  1135. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  1136. * Batch argument N is not used.
  1137. * @param[in] input_data Input (activation) data pointer. Data type: int8
  1138. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  1139. * @param[in] filter_data Filter data pointer. Data type: int8
  1140. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  1141. * @param[in] bias_data Bias data pointer. Data type: int64
  1142. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  1143. * @param[in, out] output_data Output data pointer. Data type: int16
  1144. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  1145. *
  1146. * @details
  1147. * - Supported framework: TensorFlow Lite
  1148. * - q15 is used as data type eventhough it is s16 data. It is done so to be consistent with existing APIs.
  1149. */
  1150. arm_status arm_depthwise_conv_s16(const cmsis_nn_context *ctx,
  1151. const cmsis_nn_dw_conv_params *dw_conv_params,
  1152. const cmsis_nn_per_channel_quant_params *quant_params,
  1153. const cmsis_nn_dims *input_dims,
  1154. const q15_t *input_data,
  1155. const cmsis_nn_dims *filter_dims,
  1156. const q7_t *filter_data,
  1157. const cmsis_nn_dims *bias_dims,
  1158. const int64_t *bias_data,
  1159. const cmsis_nn_dims *output_dims,
  1160. q15_t *output_data);
  1161. /**
  1162. * @brief Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on
  1163. * the input arguments(documented below). Refer arm_depthwise_conv_s8() for function
  1164. * argument details.
  1165. *
  1166. * @return The function returns one of the following
  1167. * <code>ARM_MATH_SIZE_MISMATCH</code> - Unsupported dimension of tensors
  1168. * <code>ARM_MATH_ARGUMENT_ERROR</code> - Unsupported pad size along the x axis
  1169. * <code>ARM_MATH_SUCCESS</code> - Successful operation
  1170. *
  1171. * @details
  1172. * - Supported framework : TensorFlow Lite Micro
  1173. * - The following constrains on the arguments apply
  1174. * -# Number of input channel equals number of output channels
  1175. * -# Filter height and width equals 3
  1176. * -# Padding along x is either 0 or 1.
  1177. *
  1178. */
  1179. arm_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx,
  1180. const cmsis_nn_dw_conv_params *dw_conv_params,
  1181. const cmsis_nn_per_channel_quant_params *quant_params,
  1182. const cmsis_nn_dims *input_dims,
  1183. const q7_t *input_data,
  1184. const cmsis_nn_dims *filter_dims,
  1185. const q7_t *filter_data,
  1186. const cmsis_nn_dims *bias_dims,
  1187. const int32_t *bias_data,
  1188. const cmsis_nn_dims *output_dims,
  1189. q7_t *output_data);
  1190. /**
  1191. * @brief Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel.
  1192. * Refer arm_depthwise_conv_s8() for function argument details.
  1193. *
  1194. * @return The function returns one of the following
  1195. * <code>ARM_MATH_SIZE_MISMATCH</code> - input channel != output channel or
  1196. * ch_mult != 1
  1197. * <code>ARM_MATH_SUCCESS</code> - Successful operation
  1198. *
  1199. * @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out
  1200. * for the following if MVE optimizations(Arm Helium Technology) are used.
  1201. * - Output shift
  1202. * - Output multiplier
  1203. * - Output bias
  1204. * - kernel
  1205. * @details
  1206. * - Supported framework: TensorFlow Lite
  1207. * - The following constrains on the arguments apply
  1208. * -# Number of input channel equals number of output channels or ch_mult equals 1
  1209. * - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
  1210. * - Reccomended when number of channels is 4 or greater.
  1211. *
  1212. */
  1213. arm_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
  1214. const cmsis_nn_dw_conv_params *dw_conv_params,
  1215. const cmsis_nn_per_channel_quant_params *quant_params,
  1216. const cmsis_nn_dims *input_dims,
  1217. const q7_t *input_data,
  1218. const cmsis_nn_dims *filter_dims,
  1219. const q7_t *filter_data,
  1220. const cmsis_nn_dims *bias_dims,
  1221. const int32_t *bias_data,
  1222. const cmsis_nn_dims *output_dims,
  1223. q7_t *output_data);
  1224. /**
  1225. * @brief Get the required buffer size for optimized s8 depthwise convolution
  1226. * function with constraint that in_channel equals out_channel.
  1227. * @param[in] input_dims Input (activation) tensor dimensions. Format: [1, H, W, C_IN]
  1228. * Batch argument N is not used.
  1229. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  1230. * @return The function returns required buffer size in bytes
  1231. *
  1232. */
  1233. int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
  1234. /**
  1235. * @defgroup FC Fully-connected Layer Functions
  1236. *
  1237. * Collection of fully-connected and matrix multiplication functions.
  1238. *
  1239. * Fully-connected layer is basically a matrix-vector multiplication
  1240. * with bias. The matrix is the weights and the input/output vectors
  1241. * are the activation values. Supported {weight, activation} precisions
  1242. * include {8-bit, 8-bit}, {16-bit, 16-bit}, and {8-bit, 16-bit}.
  1243. *
  1244. * Here we have two types of kernel functions. The basic function
  1245. * implements the function using regular GEMV approach. The opt functions
  1246. * operates with weights in interleaved formats.
  1247. *
  1248. */
  1249. /**
  1250. *@brief Q7 basic fully-connected layer function
  1251. *@param[in] pV pointer to input vector
  1252. *@param[in] pM pointer to matrix weights
  1253. *@param[in] dim_vec length of the vector
  1254. *@param[in] num_of_rows number of rows in weight matrix
  1255. *@param[in] bias_shift amount of left-shift for bias
  1256. *@param[in] out_shift amount of right-shift for output
  1257. *@param[in] bias pointer to bias
  1258. *@param[in,out] pOut pointer to output vector
  1259. *@param[in,out] vec_buffer pointer to buffer space for input
  1260. *@return The function returns <code>ARM_MATH_SUCCESS</code>
  1261. *
  1262. */
  1263. arm_status arm_fully_connected_q7(const q7_t *pV,
  1264. const q7_t *pM,
  1265. const uint16_t dim_vec,
  1266. const uint16_t num_of_rows,
  1267. const uint16_t bias_shift,
  1268. const uint16_t out_shift,
  1269. const q7_t *bias,
  1270. q7_t *pOut,
  1271. q15_t *vec_buffer);
  1272. /**
  1273. * @brief Basic s8 Fully Connected function.
  1274. *
  1275. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1276. * definition file to see if an additional buffer is required.
  1277. * Optional function {API}_get_buffer_size() provides the buffer
  1278. * size if an additional buffer is required.
  1279. * @param[in] fc_params Fully Connected layer parameters.
  1280. * Range of fc_params->input_offset : [-127, 128]
  1281. * fc_params->filter_offset : 0
  1282. * Range of fc_params->output_offset : [-128, 127]
  1283. * @param[in] quant_params Per-tensor quantization info.
  1284. * It contains the multiplier and shift values to be applied to the output tensor.
  1285. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  1286. * Input dimension is taken as Nx(H * W * C_IN)
  1287. * @param[in] input_data Input (activation) data pointer. Data type: int8
  1288. * @param[in] filter_dims Two dimensional filter dimensions. Format: [N, C]
  1289. * N : accumulation depth and equals (H * W * C_IN) from input_dims
  1290. * C : output depth and equals C_OUT in output_dims
  1291. * H & W : Not used
  1292. * @param[in] filter_data Filter data pointer. Data type: int8
  1293. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  1294. * N, H, W : Not used
  1295. * @param[in] bias_data Bias data pointer. Data type: int32
  1296. * @param[in] output_dims Output tensor dimensions. Format: [N, C_OUT]
  1297. * N : Batches
  1298. * C_OUT : Output depth
  1299. * H & W : Not used.
  1300. * @param[in, out] output_data Output data pointer. Data type: int8
  1301. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  1302. *
  1303. * @details
  1304. * - Supported framework: TensorFlow Lite
  1305. * - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
  1306. */
  1307. arm_status arm_fully_connected_s8(const cmsis_nn_context *ctx,
  1308. const cmsis_nn_fc_params *fc_params,
  1309. const cmsis_nn_per_tensor_quant_params *quant_params,
  1310. const cmsis_nn_dims *input_dims,
  1311. const q7_t *input_data,
  1312. const cmsis_nn_dims *filter_dims,
  1313. const q7_t *filter_data,
  1314. const cmsis_nn_dims *bias_dims,
  1315. const int32_t *bias_data,
  1316. const cmsis_nn_dims *output_dims,
  1317. q7_t *output_data);
  1318. /**
  1319. * @brief Get the required buffer size for S8 basic fully-connected and
  1320. * matrix multiplication layer function for TF Lite
  1321. * @param[in] filter_dims dimension of filter
  1322. * @return The function returns required buffer size in bytes
  1323. *
  1324. */
  1325. int32_t arm_fully_connected_s8_get_buffer_size(const cmsis_nn_dims *filter_dims);
  1326. /**
  1327. * @brief Basic s16 Fully Connected function.
  1328. *
  1329. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1330. * definition file to see if an additional buffer is required.
  1331. * Optional function {API}_get_buffer_size() provides the buffer
  1332. * size if an additional buffer is required.
  1333. * @param[in] fc_params Fully Connected layer parameters.
  1334. * fc_params->input_offset : 0
  1335. * fc_params->filter_offset : 0
  1336. * fc_params->output_offset : 0
  1337. * @param[in] quant_params Per-tensor quantization info.
  1338. * It contains the multiplier and shift values to be applied to the output tensor.
  1339. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  1340. * Input dimension is taken as Nx(H * W * C_IN)
  1341. * @param[in] input_data Input (activation) data pointer. Data type: int16
  1342. * @param[in] filter_dims Two dimensional filter dimensions. Format: [N, C]
  1343. * N : accumulation depth and equals (H * W * C_IN) from input_dims
  1344. * C : output depth and equals C_OUT in output_dims
  1345. * H & W : Not used
  1346. * @param[in] filter_data Filter data pointer. Data type: int8
  1347. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  1348. * N, H, W : Not used
  1349. * @param[in] bias_data Bias data pointer. Data type: int64
  1350. * @param[in] output_dims Output tensor dimensions. Format: [N, C_OUT]
  1351. * N : Batches
  1352. * C_OUT : Output depth
  1353. * H & W : Not used.
  1354. * @param[in, out] output_data Output data pointer. Data type: int16
  1355. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  1356. *
  1357. * @details
  1358. * - Supported framework: TensorFlow Lite
  1359. * - q15 is used as data type eventhough it is s16 data. It is done so to be consistent with existing APIs.
  1360. */
  1361. arm_status arm_fully_connected_s16(const cmsis_nn_context *ctx,
  1362. const cmsis_nn_fc_params *fc_params,
  1363. const cmsis_nn_per_tensor_quant_params *quant_params,
  1364. const cmsis_nn_dims *input_dims,
  1365. const q15_t *input_data,
  1366. const cmsis_nn_dims *filter_dims,
  1367. const q7_t *filter_data,
  1368. const cmsis_nn_dims *bias_dims,
  1369. const int64_t *bias_data,
  1370. const cmsis_nn_dims *output_dims,
  1371. q15_t *output_data);
  1372. /**
  1373. * @brief Get the required buffer size for S16 basic fully-connected and
  1374. * matrix multiplication layer function for TF Lite
  1375. * @param[in] filter_dims dimension of filter
  1376. * @return The function returns required buffer size in bytes
  1377. *
  1378. */
  1379. int32_t arm_fully_connected_s16_get_buffer_size(const cmsis_nn_dims *filter_dims);
  1380. /**
  1381. * @brief Q7 opt fully-connected layer function
  1382. * @param[in] pV pointer to input vector
  1383. * @param[in] pM pointer to matrix weights
  1384. * @param[in] dim_vec length of the vector
  1385. * @param[in] num_of_rows number of rows in weight matrix
  1386. * @param[in] bias_shift amount of left-shift for bias
  1387. * @param[in] out_shift amount of right-shift for output
  1388. * @param[in] bias pointer to bias
  1389. * @param[in,out] pOut pointer to output vector
  1390. * @param[in,out] vec_buffer pointer to buffer space for input
  1391. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  1392. *
  1393. */
  1394. arm_status arm_fully_connected_q7_opt(const q7_t *pV,
  1395. const q7_t *pM,
  1396. const uint16_t dim_vec,
  1397. const uint16_t num_of_rows,
  1398. const uint16_t bias_shift,
  1399. const uint16_t out_shift,
  1400. const q7_t *bias,
  1401. q7_t *pOut,
  1402. q15_t *vec_buffer);
  1403. /**
  1404. * @brief Q15 basic fully-connected layer function
  1405. * @param[in] pV pointer to input vector
  1406. * @param[in] pM pointer to matrix weights
  1407. * @param[in] dim_vec length of the vector
  1408. * @param[in] num_of_rows number of rows in weight matrix
  1409. * @param[in] bias_shift amount of left-shift for bias
  1410. * @param[in] out_shift amount of right-shift for output
  1411. * @param[in] bias pointer to bias
  1412. * @param[in,out] pOut pointer to output vector
  1413. * @param[in,out] vec_buffer pointer to buffer space for input
  1414. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  1415. *
  1416. */
  1417. arm_status arm_fully_connected_q15(const q15_t *pV,
  1418. const q15_t *pM,
  1419. const uint16_t dim_vec,
  1420. const uint16_t num_of_rows,
  1421. const uint16_t bias_shift,
  1422. const uint16_t out_shift,
  1423. const q15_t *bias,
  1424. q15_t *pOut,
  1425. q15_t *vec_buffer);
  1426. /**
  1427. * @brief Q15 opt fully-connected layer function
  1428. * @param[in] pV pointer to input vector
  1429. * @param[in] pM pointer to matrix weights
  1430. * @param[in] dim_vec length of the vector
  1431. * @param[in] num_of_rows number of rows in weight matrix
  1432. * @param[in] bias_shift amount of left-shift for bias
  1433. * @param[in] out_shift amount of right-shift for output
  1434. * @param[in] bias pointer to bias
  1435. * @param[in,out] pOut pointer to output vector
  1436. * @param[in,out] vec_buffer pointer to buffer space for input
  1437. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  1438. *
  1439. */
  1440. arm_status arm_fully_connected_q15_opt(const q15_t *pV,
  1441. const q15_t *pM,
  1442. const uint16_t dim_vec,
  1443. const uint16_t num_of_rows,
  1444. const uint16_t bias_shift,
  1445. const uint16_t out_shift,
  1446. const q15_t *bias,
  1447. q15_t *pOut,
  1448. q15_t *vec_buffer);
  1449. /**
  1450. * @brief Mixed Q15-Q7 fully-connected layer function
  1451. * @param[in] pV pointer to input vector
  1452. * @param[in] pM pointer to matrix weights
  1453. * @param[in] dim_vec length of the vector
  1454. * @param[in] num_of_rows number of rows in weight matrix
  1455. * @param[in] bias_shift amount of left-shift for bias
  1456. * @param[in] out_shift amount of right-shift for output
  1457. * @param[in] bias pointer to bias
  1458. * @param[in,out] pOut pointer to output vector
  1459. * @param[in,out] vec_buffer pointer to buffer space for input
  1460. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  1461. *
  1462. */
  1463. arm_status arm_fully_connected_mat_q7_vec_q15(const q15_t *pV,
  1464. const q7_t *pM,
  1465. const uint16_t dim_vec,
  1466. const uint16_t num_of_rows,
  1467. const uint16_t bias_shift,
  1468. const uint16_t out_shift,
  1469. const q7_t *bias,
  1470. q15_t *pOut,
  1471. q15_t *vec_buffer);
  1472. /**
  1473. * @brief Mixed Q15-Q7 opt fully-connected layer function
  1474. * @param[in] pV pointer to input vector
  1475. * @param[in] pM pointer to matrix weights
  1476. * @param[in] dim_vec length of the vector
  1477. * @param[in] num_of_rows number of rows in weight matrix
  1478. * @param[in] bias_shift amount of left-shift for bias
  1479. * @param[in] out_shift amount of right-shift for output
  1480. * @param[in] bias pointer to bias
  1481. * @param[in,out] pOut pointer to output vector
  1482. * @param[in,out] vec_buffer pointer to buffer space for input
  1483. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  1484. *
  1485. */
  1486. arm_status arm_fully_connected_mat_q7_vec_q15_opt(const q15_t *pV,
  1487. const q7_t *pM,
  1488. const uint16_t dim_vec,
  1489. const uint16_t num_of_rows,
  1490. const uint16_t bias_shift,
  1491. const uint16_t out_shift,
  1492. const q7_t *bias,
  1493. q15_t *pOut,
  1494. q15_t *vec_buffer);
  1495. /**
  1496. * @brief Matrix-Multiplication Kernels for Convolution
  1497. *
  1498. * These functions are used within convolution layer functions for
  1499. * matrix multiplication.
  1500. *
  1501. * The implementation is similar to CMSIS-DSP arm_mat_mult functions
  1502. * with one Q7 and one Q15 operands. The Q15 operand is the im2col
  1503. * output which is always with 2 columns.
  1504. *
  1505. */
  1506. /**
  1507. * @brief Matrix-multiplication function for convolution
  1508. * @param[in] pA pointer to operand A
  1509. * @param[in] pInBuffer pointer to operand B, always conssists of 2 vectors
  1510. * @param[in] ch_im_out numRow of A
  1511. * @param[in] numCol_A numCol of A
  1512. * @param[in] bias_shift amount of left-shift for bias
  1513. * @param[in] out_shift amount of right-shift for output
  1514. * @param[in] bias the bias
  1515. * @param[in,out] pOut pointer to output
  1516. * @return The function returns the incremented output pointer
  1517. */
  1518. q7_t *arm_nn_mat_mult_kernel_q7_q15(const q7_t *pA,
  1519. const q15_t *pInBuffer,
  1520. const uint16_t ch_im_out,
  1521. const uint16_t numCol_A,
  1522. const uint16_t bias_shift,
  1523. const uint16_t out_shift,
  1524. const q7_t *bias,
  1525. q7_t *pOut);
  1526. #ifdef __cplusplus
  1527. }
  1528. #endif
  1529. /*
  1530. * Other functions
  1531. * These layers are typically not timing critical
  1532. * Basic implementation is supported here
  1533. */
  1534. #ifdef __cplusplus
  1535. extern "C" {
  1536. #endif
  1537. /**
  1538. * @defgroup BasicMath Basic math functions
  1539. *
  1540. * Elementwise add and multiplication functions.
  1541. *
  1542. */
  1543. /**
  1544. * @brief s8 elementwise add of two vectors
  1545. * @param[in] input_1_vect pointer to input vector 1
  1546. * @param[in] input_2_vect pointer to input vector 2
  1547. * @param[in] input_1_offset offset for input 1. Range: -127 to 128
  1548. * @param[in] input_1_mult multiplier for input 1
  1549. * @param[in] input_1_shift shift for input 1
  1550. * @param[in] input_2_offset offset for input 2. Range: -127 to 128
  1551. * @param[in] input_2_mult multiplier for input 2
  1552. * @param[in] input_2_shift shift for input 2
  1553. * @param[in] left_shift input left shift
  1554. * @param[in,out] output pointer to output vector
  1555. * @param[in] out_offset output offset. Range: -128 to 127
  1556. * @param[in] out_mult output multiplier
  1557. * @param[in] out_shift output shift
  1558. * @param[in] out_activation_min minimum value to clamp output to. Min: -128
  1559. * @param[in] out_activation_max maximum value to clamp output to. Max: 127
  1560. * @param[in] block_size number of samples
  1561. * @return The function returns ARM_MATH_SUCCESS
  1562. */
  1563. arm_status arm_elementwise_add_s8(const int8_t *input_1_vect,
  1564. const int8_t *input_2_vect,
  1565. const int32_t input_1_offset,
  1566. const int32_t input_1_mult,
  1567. const int32_t input_1_shift,
  1568. const int32_t input_2_offset,
  1569. const int32_t input_2_mult,
  1570. const int32_t input_2_shift,
  1571. const int32_t left_shift,
  1572. int8_t *output,
  1573. const int32_t out_offset,
  1574. const int32_t out_mult,
  1575. const int32_t out_shift,
  1576. const int32_t out_activation_min,
  1577. const int32_t out_activation_max,
  1578. const int32_t block_size);
  1579. /**
  1580. * @brief s16 elementwise add of two vectors
  1581. * @param[in] input_1_vect pointer to input vector 1
  1582. * @param[in] input_2_vect pointer to input vector 2
  1583. * @param[in] input_1_offset offset for input 1. Not used.
  1584. * @param[in] input_1_mult multiplier for input 1
  1585. * @param[in] input_1_shift shift for input 1
  1586. * @param[in] input_2_offset offset for input 2. Not used.
  1587. * @param[in] input_2_mult multiplier for input 2
  1588. * @param[in] input_2_shift shift for input 2
  1589. * @param[in] left_shift input left shift
  1590. * @param[in,out] output pointer to output vector
  1591. * @param[in] out_offset output offset. Not used.
  1592. * @param[in] out_mult output multiplier
  1593. * @param[in] out_shift output shift
  1594. * @param[in] out_activation_min minimum value to clamp output to. Min: -32768
  1595. * @param[in] out_activation_max maximum value to clamp output to. Max: 32767
  1596. * @param[in] block_size number of samples
  1597. * @return The function returns ARM_MATH_SUCCESS
  1598. */
  1599. arm_status arm_elementwise_add_s16(const int16_t *input_1_vect,
  1600. const int16_t *input_2_vect,
  1601. const int32_t input_1_offset,
  1602. const int32_t input_1_mult,
  1603. const int32_t input_1_shift,
  1604. const int32_t input_2_offset,
  1605. const int32_t input_2_mult,
  1606. const int32_t input_2_shift,
  1607. const int32_t left_shift,
  1608. int16_t *output,
  1609. const int32_t out_offset,
  1610. const int32_t out_mult,
  1611. const int32_t out_shift,
  1612. const int32_t out_activation_min,
  1613. const int32_t out_activation_max,
  1614. const int32_t block_size);
  1615. /**
  1616. * @brief s8 elementwise multiplication
  1617. * @param[in] input_1_vect pointer to input vector 1
  1618. * @param[in] input_2_vect pointer to input vector 2
  1619. * @param[in] input_1_offset offset for input 1. Range: -127 to 128
  1620. * @param[in] input_2_offset offset for input 2. Range: -127 to 128
  1621. * @param[in,out] output pointer to output vector
  1622. * @param[in] out_offset output offset. Range: -128 to 127
  1623. * @param[in] out_mult output multiplier
  1624. * @param[in] out_shift output shift
  1625. * @param[in] out_activation_min minimum value to clamp output to. Min: -128
  1626. * @param[in] out_activation_max maximum value to clamp output to. Max: 127
  1627. * @param[in] block_size number of samples
  1628. * @return The function returns ARM_MATH_SUCCESS
  1629. *
  1630. * @details Supported framework: TensorFlow Lite micro
  1631. */
  1632. arm_status arm_elementwise_mul_s8(const int8_t *input_1_vect,
  1633. const int8_t *input_2_vect,
  1634. const int32_t input_1_offset,
  1635. const int32_t input_2_offset,
  1636. int8_t *output,
  1637. const int32_t out_offset,
  1638. const int32_t out_mult,
  1639. const int32_t out_shift,
  1640. const int32_t out_activation_min,
  1641. const int32_t out_activation_max,
  1642. const int32_t block_size);
  1643. /**
  1644. * @brief s16 elementwise multiplication
  1645. * @param[in] input_1_vect pointer to input vector 1
  1646. * @param[in] input_2_vect pointer to input vector 2
  1647. * @param[in] input_1_offset offset for input 1. Not used.
  1648. * @param[in] input_2_offset offset for input 2. Not used.
  1649. * @param[in,out] output pointer to output vector
  1650. * @param[in] out_offset output offset. Not used.
  1651. * @param[in] out_mult output multiplier
  1652. * @param[in] out_shift output shift
  1653. * @param[in] out_activation_min minimum value to clamp output to. Min: -32768
  1654. * @param[in] out_activation_max maximum value to clamp output to. Max: 32767
  1655. * @param[in] block_size number of samples
  1656. * @return The function returns ARM_MATH_SUCCESS
  1657. *
  1658. * @details Supported framework: TensorFlow Lite micro
  1659. */
  1660. arm_status arm_elementwise_mul_s16(const int16_t *input_1_vect,
  1661. const int16_t *input_2_vect,
  1662. const int32_t input_1_offset,
  1663. const int32_t input_2_offset,
  1664. int16_t *output,
  1665. const int32_t out_offset,
  1666. const int32_t out_mult,
  1667. const int32_t out_shift,
  1668. const int32_t out_activation_min,
  1669. const int32_t out_activation_max,
  1670. const int32_t block_size);
  1671. /**
  1672. * @defgroup Acti Activation Functions
  1673. *
  1674. * Perform activation layers, including ReLU (Rectified Linear Unit),
  1675. * sigmoid and tanh
  1676. *
  1677. */
  1678. /**
  1679. * @brief Q7 RELU function
  1680. * @param[in,out] data pointer to input
  1681. * @param[in] size number of elements
  1682. * @return none.
  1683. */
  1684. void arm_relu_q7(q7_t *data, uint16_t size);
  1685. /**
  1686. * @brief s8 ReLU6 function
  1687. * @param[in,out] data pointer to input
  1688. * @param[in] size number of elements
  1689. */
  1690. void arm_relu6_s8(q7_t *data, uint16_t size);
  1691. /**
  1692. * @brief Q15 RELU function
  1693. * @param[in,out] data pointer to input
  1694. * @param[in] size number of elements
  1695. * @return none.
  1696. */
  1697. void arm_relu_q15(q15_t *data, uint16_t size);
  1698. /**
  1699. * @brief Q7 neural network activation function using direct table look-up
  1700. * @param[in,out] data pointer to input
  1701. * @param[in] size number of elements
  1702. * @param[in] int_width bit-width of the integer part, assume to be smaller than 3
  1703. * @param[in] type type of activation functions
  1704. * @return none.
  1705. */
  1706. void arm_nn_activations_direct_q7(q7_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type);
  1707. /**
  1708. * @brief Q15 neural network activation function using direct table look-up
  1709. * @param[in,out] data pointer to input
  1710. * @param[in] size number of elements
  1711. * @param[in] int_width bit-width of the integer part, assume to be smaller than 3
  1712. * @param[in] type type of activation functions
  1713. * @return none.
  1714. *
  1715. * @details
  1716. *
  1717. * This is the direct table look-up approach.
  1718. *
  1719. * Assume here the integer part of the fixed-point is <= 3.
  1720. * More than 3 just not making much sense, makes no difference with
  1721. * saturation followed by any of these activation functions.
  1722. */
  1723. void arm_nn_activations_direct_q15(q15_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type);
  1724. /**
  1725. * @defgroup Pooling Pooling Functions
  1726. *
  1727. * Perform pooling functions, including max pooling and average pooling
  1728. *
  1729. */
  1730. /**
  1731. * @brief Q7 max pooling function
  1732. * @param[in] Im_in pointer to input tensor
  1733. * @param[in] dim_im_in input tensor dimension
  1734. * @param[in] ch_im_in number of input tensor channels
  1735. * @param[in] dim_kernel filter kernel size
  1736. * @param[in] padding padding sizes
  1737. * @param[in] stride convolution stride
  1738. * @param[in] dim_im_out output tensor dimension
  1739. * @param[in,out] bufferA pointer to buffer space for input
  1740. * @param[in,out] Im_out pointer to output tensor
  1741. * @return none.
  1742. *
  1743. */
  1744. void arm_maxpool_q7_HWC(q7_t *Im_in,
  1745. const uint16_t dim_im_in,
  1746. const uint16_t ch_im_in,
  1747. const uint16_t dim_kernel,
  1748. const uint16_t padding,
  1749. const uint16_t stride,
  1750. const uint16_t dim_im_out,
  1751. q7_t *bufferA,
  1752. q7_t *Im_out);
  1753. /**
  1754. * @brief Q7 average pooling function
  1755. * @param[in] Im_in pointer to input tensor
  1756. * @param[in] dim_im_in input tensor dimension
  1757. * @param[in] ch_im_in number of input tensor channels
  1758. * @param[in] dim_kernel filter kernel size
  1759. * @param[in] padding padding sizes
  1760. * @param[in] stride convolution stride
  1761. * @param[in] dim_im_out output tensor dimension
  1762. * @param[in,out] bufferA pointer to buffer space for input
  1763. * @param[in,out] Im_out pointer to output tensor
  1764. * @return none.
  1765. *
  1766. */
  1767. void arm_avepool_q7_HWC(q7_t *Im_in,
  1768. const uint16_t dim_im_in,
  1769. const uint16_t ch_im_in,
  1770. const uint16_t dim_kernel,
  1771. const uint16_t padding,
  1772. const uint16_t stride,
  1773. const uint16_t dim_im_out,
  1774. q7_t *bufferA,
  1775. q7_t *Im_out);
  1776. /**
  1777. * @brief s8 average pooling function.
  1778. *
  1779. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1780. * definition file to see if an additional buffer is required.
  1781. * Optional function {API}_get_buffer_size() provides the buffer
  1782. * size if an additional buffer is required.
  1783. * @param[in] pool_params Pooling parameters
  1784. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  1785. * Argument 'N' is not used.
  1786. * @param[in] input_data Input (activation) data pointer. Data type: int8
  1787. * @param[in] filter_dims Filter tensor dimensions. Format: [H, W]
  1788. * Argument N and C are not used.
  1789. * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT]
  1790. * Argument N is not used.
  1791. * C_OUT equals C_IN.
  1792. * @param[in, out] output_data Output data pointer. Data type: int8
  1793. * @return The function returns
  1794. * <code>ARM_MATH_SUCCESS</code> - Successful operation
  1795. *
  1796. * @details
  1797. * - Supported Framework: TensorFlow Lite
  1798. *
  1799. */
  1800. arm_status arm_avgpool_s8(const cmsis_nn_context *ctx,
  1801. const cmsis_nn_pool_params *pool_params,
  1802. const cmsis_nn_dims *input_dims,
  1803. const q7_t *input_data,
  1804. const cmsis_nn_dims *filter_dims,
  1805. const cmsis_nn_dims *output_dims,
  1806. q7_t *output_data);
  1807. /**
  1808. * @brief Get the required buffer size for S8 average pooling function
  1809. * @param[in] dim_dst_width output tensor dimension
  1810. * @param[in] ch_src number of input tensor channels
  1811. * @return The function returns required buffer size in bytes
  1812. *
  1813. */
  1814. int32_t arm_avgpool_s8_get_buffer_size(const int dim_dst_width, const int ch_src);
  1815. /**
  1816. * @brief s16 average pooling function.
  1817. *
  1818. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1819. * definition file to see if an additional buffer is required.
  1820. * Optional function {API}_get_buffer_size() provides the buffer
  1821. * size if an additional buffer is required.
  1822. * @param[in] pool_params Pooling parameters
  1823. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  1824. * Argument 'N' is not used.
  1825. * @param[in] input_data Input (activation) data pointer. Data type: int16
  1826. * @param[in] filter_dims Filter tensor dimensions. Format: [H, W]
  1827. * Argument N and C are not used.
  1828. * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT]
  1829. * Argument N is not used.
  1830. * C_OUT equals C_IN.
  1831. * @param[in, out] output_data Output data pointer. Data type: int16
  1832. * @return The function returns
  1833. * <code>ARM_MATH_SUCCESS</code> - Successful operation
  1834. *
  1835. * @details
  1836. * - Supported Framework: TensorFlow Lite
  1837. *
  1838. */
  1839. arm_status arm_avgpool_s16(const cmsis_nn_context *ctx,
  1840. const cmsis_nn_pool_params *pool_params,
  1841. const cmsis_nn_dims *input_dims,
  1842. const int16_t *input_data,
  1843. const cmsis_nn_dims *filter_dims,
  1844. const cmsis_nn_dims *output_dims,
  1845. int16_t *output_data);
  1846. /**
  1847. * @brief Get the required buffer size for S16 average pooling function
  1848. * @param[in] dim_dst_width output tensor dimension
  1849. * @param[in] ch_src number of input tensor channels
  1850. * @return The function returns required buffer size in bytes
  1851. *
  1852. */
  1853. int32_t arm_avgpool_s16_get_buffer_size(const int dim_dst_width, const int ch_src);
  1854. /**
  1855. * @brief s8 max pooling function.
  1856. *
  1857. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1858. * definition file to see if an additional buffer is required.
  1859. * Optional function {API}_get_buffer_size() provides the buffer
  1860. * size if an additional buffer is required.
  1861. * @param[in] pool_params Pooling parameters
  1862. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  1863. * Argument 'N' is not used.
  1864. * @param[in] input_data Input (activation) data pointer. The input tensor must not
  1865. * overlap with the output tensor. Data type: int8
  1866. * @param[in] filter_dims Filter tensor dimensions. Format: [H, W]
  1867. * Argument N and C are not used.
  1868. * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT]
  1869. * Argument N is not used.
  1870. * C_OUT equals C_IN.
  1871. * @param[in, out] output_data Output data pointer. Data type: int8
  1872. * @return The function returns
  1873. * <code>ARM_MATH_SUCCESS</code> - Successful operation
  1874. *
  1875. * @details
  1876. * - Supported Framework: TensorFlow Lite
  1877. *
  1878. */
  1879. arm_status arm_max_pool_s8(const cmsis_nn_context *ctx,
  1880. const cmsis_nn_pool_params *pool_params,
  1881. const cmsis_nn_dims *input_dims,
  1882. const q7_t *input_data,
  1883. const cmsis_nn_dims *filter_dims,
  1884. const cmsis_nn_dims *output_dims,
  1885. q7_t *output_data);
  1886. /**
  1887. * @brief s16 max pooling function.
  1888. *
  1889. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1890. * definition file to see if an additional buffer is required.
  1891. * Optional function {API}_get_buffer_size() provides the buffer
  1892. * size if an additional buffer is required.
  1893. * @param[in] pool_params Pooling parameters
  1894. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  1895. * Argument 'N' is not used.
  1896. * @param[in] src Input (activation) data pointer. The input tensor must not
  1897. * overlap with the output tensor. Data type: int16
  1898. * @param[in] filter_dims Filter tensor dimensions. Format: [H, W]
  1899. * Argument N and C are not used.
  1900. * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT]
  1901. * Argument N is not used.
  1902. * C_OUT equals C_IN.
  1903. * @param[in, out] dst Output data pointer. Data type: int16
  1904. * @return The function returns
  1905. * <code>ARM_MATH_SUCCESS</code> - Successful operation
  1906. *
  1907. * @details
  1908. * - Supported Framework: TensorFlow Lite
  1909. *
  1910. */
  1911. arm_status arm_max_pool_s16(const cmsis_nn_context *ctx,
  1912. const cmsis_nn_pool_params *pool_params,
  1913. const cmsis_nn_dims *input_dims,
  1914. const int16_t *src,
  1915. const cmsis_nn_dims *filter_dims,
  1916. const cmsis_nn_dims *output_dims,
  1917. int16_t *dst);
  1918. /**
  1919. * @defgroup Softmax Softmax Functions
  1920. *
  1921. * EXP(2) based softmax functions.
  1922. *
  1923. */
  1924. /**
  1925. * @brief Q7 softmax function
  1926. * @param[in] vec_in pointer to input vector
  1927. * @param[in] dim_vec input vector dimension
  1928. * @param[out] p_out pointer to output vector
  1929. *
  1930. * @note This function is an optimized version which is not bit-accurate with
  1931. * TensorFlow Lite's kernel
  1932. *
  1933. */
  1934. void arm_softmax_q7(const q7_t *vec_in, const uint16_t dim_vec, q7_t *p_out);
  1935. /**
  1936. * @brief Q7 softmax function with batch parameter
  1937. * @param[in] vec_in pointer to input vector
  1938. * @param[in] nb_batches number of batches
  1939. * @param[in] dim_vec input vector dimension
  1940. * @param[out] p_out pointer to output vector
  1941. * @return none.
  1942. *
  1943. * @note This function is an optimized version which is not bit-accurate with
  1944. * TensorFlow Lite's kernel
  1945. *
  1946. */
  1947. void arm_softmax_with_batch_q7(const q7_t *vec_in, const uint16_t nb_batches, const uint16_t dim_vec, q7_t *p_out);
  1948. /**
  1949. * @brief Q15 softmax function
  1950. * @param[in] vec_in pointer to input vector
  1951. * @param[in] dim_vec input vector dimension
  1952. * @param[out] p_out pointer to output vector
  1953. * @return none.
  1954. *
  1955. * @note This function is an optimized version which is not bit-accurate with
  1956. * TensorFlow Lite's kernel
  1957. *
  1958. */
  1959. void arm_softmax_q15(const q15_t *vec_in, const uint16_t dim_vec, q15_t *p_out);
  1960. /**
  1961. * @brief S8 softmax function
  1962. * @param[in] input Pointer to the input tensor
  1963. * @param[in] num_rows Number of rows in the input tensor
  1964. * @param[in] row_size Number of elements in each input row
  1965. * @param[in] mult Input quantization multiplier
  1966. * @param[in] shift Input quantization shift within the range [0, 31]
  1967. * @param[in] diff_min Minimum difference with max in row. Used to check if
  1968. * the quantized exponential operation can be performed
  1969. * @param[out] output Pointer to the output tensor
  1970. *
  1971. * @note Supported framework: TensorFlow Lite micro (bit-accurate)
  1972. *
  1973. */
  1974. void arm_softmax_s8(const int8_t *input,
  1975. const int32_t num_rows,
  1976. const int32_t row_size,
  1977. const int32_t mult,
  1978. const int32_t shift,
  1979. const int32_t diff_min,
  1980. int8_t *output);
  1981. /**
  1982. * @brief S8 to s16 softmax function
  1983. * @param[in] input Pointer to the input tensor
  1984. * @param[in] num_rows Number of rows in the input tensor
  1985. * @param[in] row_size Number of elements in each input row
  1986. * @param[in] mult Input quantization multiplier
  1987. * @param[in] shift Input quantization shift within the range [0, 31]
  1988. * @param[in] diff_min Minimum difference with max in row. Used to check if
  1989. * the quantized exponential operation can be performed
  1990. * @param[out] output Pointer to the output tensor
  1991. *
  1992. * @note Supported framework: TensorFlow Lite micro (bit-accurate)
  1993. *
  1994. */
  1995. void arm_softmax_s8_s16(const int8_t *input,
  1996. const int32_t num_rows,
  1997. const int32_t row_size,
  1998. const int32_t mult,
  1999. const int32_t shift,
  2000. const int32_t diff_min,
  2001. int16_t *output);
  2002. /**
  2003. * @brief S16 softmax function
  2004. * @param[in] input Pointer to the input tensor
  2005. * @param[in] num_rows Number of rows in the input tensor
  2006. * @param[in] row_size Number of elements in each input row
  2007. * @param[in] mult Input quantization multiplier
  2008. * @param[in] shift Input quantization shift within the range [0, 31]
  2009. * @param[in] softmax_params Softmax s16 layer parameters with two pointers to LUTs speficied below.
  2010. * For indexing the high 9 bits are used and 7 remaining for interpolation.
  2011. * That means 512 entries for the 9-bit indexing and 1 extra for interpolation, i.e. 513
  2012. * values for each LUT.
  2013. * - Lookup table for exp(x), where x uniform distributed between [-10.0 , 0.0]
  2014. * - Lookup table for 1 / (1 + x), where x uniform distributed between [0.0 , 1.0]
  2015. * @param[out] output Pointer to the output tensor
  2016. * @return The function returns
  2017. * <code>ARM_MATH_ARGUMENT_ERROR</code> if LUTs are NULL
  2018. * <code>ARM_MATH_SUCCESS</code> - Successful operation
  2019. *
  2020. * @note Supported framework: TensorFlow Lite micro (bit-accurate)
  2021. *
  2022. */
  2023. arm_status arm_softmax_s16(const int16_t *input,
  2024. const int32_t num_rows,
  2025. const int32_t row_size,
  2026. const int32_t mult,
  2027. const int32_t shift,
  2028. const cmsis_nn_softmax_lut_s16 *softmax_params,
  2029. int16_t *output);
  2030. /**
  2031. * @brief U8 softmax function
  2032. * @param[in] input Pointer to the input tensor
  2033. * @param[in] num_rows Number of rows in the input tensor
  2034. * @param[in] row_size Number of elements in each input row
  2035. * @param[in] mult Input quantization multiplier
  2036. * @param[in] shift Input quantization shift within the range [0, 31]
  2037. * @param[in] diff_min Minimum difference with max in row. Used to check if
  2038. * the quantized exponential operation can be performed
  2039. * @param[out] output Pointer to the output tensor
  2040. *
  2041. * @note Supported framework: TensorFlow Lite micro (bit-accurate)
  2042. *
  2043. */
  2044. void arm_softmax_u8(const uint8_t *input,
  2045. const int32_t num_rows,
  2046. const int32_t row_size,
  2047. const int32_t mult,
  2048. const int32_t shift,
  2049. const int32_t diff_min,
  2050. uint8_t *output);
  2051. /**
  2052. * @brief uint8 depthwise convolution function with asymmetric quantization
  2053. * Unless specified otherwise, arguments are mandatory.
  2054. *
  2055. * @param[in] input Pointer to input tensor
  2056. * @param[in] input_x Width of input tensor
  2057. * @param[in] input_y Height of input tensor
  2058. * @param[in] input_ch Channels in input tensor
  2059. * @param[in] kernel Pointer to kernel weights
  2060. * @param[in] kernel_x Width of kernel
  2061. * @param[in] kernel_y Height of kernel
  2062. * @param[in] ch_mult Number of channel multiplier
  2063. * @param[in] pad_x Padding sizes x
  2064. * @param[in] pad_y Padding sizes y
  2065. * @param[in] stride_x stride along the width
  2066. * @param[in] stride_y stride along the height
  2067. * @param[in] dilation_x Dilation along width. Not used and intended for future enhancement.
  2068. * @param[in] dilation_y Dilation along height. Not used and intended for future enhancement.
  2069. * @param[in] bias Pointer to optional bias values. If no bias is
  2070. * availble, NULL is expected
  2071. * @param[in] input_offset Input tensor zero offset
  2072. * @param[in] filter_offset Kernel tensor zero offset
  2073. * @param[in] output_offset Output tensor zero offset
  2074. * @param[in,out] output Pointer to output tensor
  2075. * @param[in] output_x Width of output tensor
  2076. * @param[in] output_y Height of output tensor
  2077. * @param[in] output_activation_min Minimum value to clamp the output to. Range : {0, 255}
  2078. * @param[in] output_activation_max Minimum value to clamp the output to. Range : {0, 255}
  2079. * @param[in] out_shift Amount of right-shift for output
  2080. * @param[in] out_mult Output multiplier for requantization
  2081. * @return The function returns the following
  2082. * <code>ARM_MATH_SUCCESS</code> - Successful operation
  2083. *
  2084. */
  2085. arm_status arm_depthwise_conv_u8_basic_ver1(const uint8_t *input,
  2086. const uint16_t input_x,
  2087. const uint16_t input_y,
  2088. const uint16_t input_ch,
  2089. const uint8_t *kernel,
  2090. const uint16_t kernel_x,
  2091. const uint16_t kernel_y,
  2092. const int16_t ch_mult,
  2093. const int16_t pad_x,
  2094. const int16_t pad_y,
  2095. const int16_t stride_x,
  2096. const int16_t stride_y,
  2097. const int16_t dilation_x,
  2098. const int16_t dilation_y,
  2099. const int32_t *bias,
  2100. const int32_t input_offset,
  2101. const int32_t filter_offset,
  2102. const int32_t output_offset,
  2103. uint8_t *output,
  2104. const uint16_t output_x,
  2105. const uint16_t output_y,
  2106. const int32_t output_activation_min,
  2107. const int32_t output_activation_max,
  2108. const int32_t out_shift,
  2109. const int32_t out_mult);
  2110. /**
  2111. * @defgroup Reshape Reshape Functions
  2112. *
  2113. */
  2114. /**
  2115. * @brief Reshape a s8 vector into another with different shape
  2116. * @param[in] input points to the s8 input vector
  2117. * @param[out] output points to the s8 output vector
  2118. * @param[in] total_size total size of the input and output vectors in bytes
  2119. *
  2120. * @note The output is expected to be in a memory area that does not overlap with the input's
  2121. *
  2122. */
  2123. void arm_reshape_s8(const int8_t *input, int8_t *output, const uint32_t total_size);
  2124. /**
  2125. * @defgroup Concatenation Concatenation Functions
  2126. *
  2127. */
  2128. /**
  2129. * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the X axis
  2130. * This function should be called for each input tensor to concatenate. The argument offset_x
  2131. * will be used to store the input tensor in the correct position in the output tensor
  2132. *
  2133. * i.e. offset_x = 0
  2134. * for(i = 0 i < num_input_tensors; ++i)
  2135. * {
  2136. * arm_concatenation_s8_x(&input[i], ..., &output, ..., ..., offset_x)
  2137. * offset_x += input_x[i]
  2138. * }
  2139. *
  2140. * This function assumes that the output tensor has:
  2141. * -# The same height of the input tensor
  2142. * -# The same number of channels of the input tensor
  2143. * -# The same batch size of the input tensor
  2144. *
  2145. * Unless specified otherwise, arguments are mandatory.
  2146. *
  2147. * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
  2148. * does not involve any arithmetic operation
  2149. *
  2150. * @param[in] input Pointer to input tensor. Input tensor must not overlap with the output tensor.
  2151. * @param[in] input_x Width of input tensor
  2152. * @param[in] input_y Height of input tensor
  2153. * @param[in] input_z Channels in input tensor
  2154. * @param[in] input_w Batch size in input tensor
  2155. * @param[out] output Pointer to output tensor. Expected to be at least
  2156. * (input_x * input_y * input_z * input_w) + offset_x
  2157. * bytes.
  2158. * @param[in] output_x Width of output tensor
  2159. * @param[in] offset_x The offset (in number of elements) on the X axis to start concatenating the input tensor
  2160. * It is user responsibility to provide the correct value
  2161. *
  2162. * <b> Input constraints</b>
  2163. * offset_x is less than output_x
  2164. *
  2165. */
  2166. void arm_concatenation_s8_x(const int8_t *input,
  2167. const uint16_t input_x,
  2168. const uint16_t input_y,
  2169. const uint16_t input_z,
  2170. const uint16_t input_w,
  2171. int8_t *output,
  2172. const uint16_t output_x,
  2173. const uint32_t offset_x);
  2174. /**
  2175. * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Y axis
  2176. * This function should be called for each input tensor to concatenate. The argument offset_y
  2177. * will be used to store the input tensor in the correct position in the output tensor
  2178. *
  2179. * i.e. offset_y = 0
  2180. * for(i = 0 i < num_input_tensors; ++i)
  2181. * {
  2182. * arm_concatenation_s8_y(&input[i], ..., &output, ..., ..., offset_y)
  2183. * offset_y += input_y[i]
  2184. * }
  2185. *
  2186. * This function assumes that the output tensor has:
  2187. * -# The same width of the input tensor
  2188. * -# The same number of channels of the input tensor
  2189. * -# The same batch size of the input tensor
  2190. *
  2191. * Unless specified otherwise, arguments are mandatory.
  2192. *
  2193. * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
  2194. * does not involve any arithmetic operation
  2195. *
  2196. * @param[in] input Pointer to input tensor. Input tensor must not overlap with the output tensor.
  2197. * @param[in] input_x Width of input tensor
  2198. * @param[in] input_y Height of input tensor
  2199. * @param[in] input_z Channels in input tensor
  2200. * @param[in] input_w Batch size in input tensor
  2201. * @param[out] output Pointer to output tensor. Expected to be at least
  2202. * (input_z * input_w * input_x * input_y) + offset_y
  2203. * bytes.
  2204. * @param[in] output_y Height of output tensor
  2205. * @param[in] offset_y The offset on the Y axis to start concatenating the input tensor
  2206. * It is user responsibility to provide the correct value
  2207. *
  2208. * <b> Input constraints</b>
  2209. * offset_y is less than output_y
  2210. *
  2211. */
  2212. void arm_concatenation_s8_y(const int8_t *input,
  2213. const uint16_t input_x,
  2214. const uint16_t input_y,
  2215. const uint16_t input_z,
  2216. const uint16_t input_w,
  2217. int8_t *output,
  2218. const uint16_t output_y,
  2219. const uint32_t offset_y);
  2220. /**
  2221. * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Z axis
  2222. * This function should be called for each input tensor to concatenate. The argument offset_z
  2223. * will be used to store the input tensor in the correct position in the output tensor
  2224. *
  2225. * i.e. offset_z = 0
  2226. * for(i = 0 i < num_input_tensors; ++i)
  2227. * {
  2228. * arm_concatenation_s8_z(&input[i], ..., &output, ..., ..., offset_z)
  2229. * offset_z += input_z[i]
  2230. * }
  2231. *
  2232. * This function assumes that the output tensor has:
  2233. * -# The same width of the input tensor
  2234. * -# The same height of the input tensor
  2235. * -# The same batch size of the input tensor
  2236. *
  2237. * Unless specified otherwise, arguments are mandatory.
  2238. *
  2239. * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
  2240. * does not involve any arithmetic operation
  2241. *
  2242. * @param[in] input Pointer to input tensor. Input tensor must not overlap with output tensor.
  2243. * @param[in] input_x Width of input tensor
  2244. * @param[in] input_y Height of input tensor
  2245. * @param[in] input_z Channels in input tensor
  2246. * @param[in] input_w Batch size in input tensor
  2247. * @param[out] output Pointer to output tensor. Expected to be at least
  2248. * (input_x * input_y * input_z * input_w) + offset_z
  2249. * bytes.
  2250. * @param[in] output_z Channels in output tensor
  2251. * @param[in] offset_z The offset on the Z axis to start concatenating the input tensor
  2252. * It is user responsibility to provide the correct value
  2253. *
  2254. * <b> Input constraints</b>
  2255. * offset_z is less than output_z
  2256. *
  2257. */
  2258. void arm_concatenation_s8_z(const int8_t *input,
  2259. const uint16_t input_x,
  2260. const uint16_t input_y,
  2261. const uint16_t input_z,
  2262. const uint16_t input_w,
  2263. int8_t *output,
  2264. const uint16_t output_z,
  2265. const uint32_t offset_z);
  2266. /**
  2267. * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the W axis (Batch size)
  2268. * This function should be called for each input tensor to concatenate. The argument offset_w
  2269. * will be used to store the input tensor in the correct position in the output tensor
  2270. *
  2271. * i.e. offset_w = 0
  2272. * for(i = 0 i < num_input_tensors; ++i)
  2273. * {
  2274. * arm_concatenation_s8_w(&input[i], ..., &output, ..., ..., offset_w)
  2275. * offset_w += input_w[i]
  2276. * }
  2277. *
  2278. * This function assumes that the output tensor has:
  2279. * -# The same width of the input tensor
  2280. * -# The same height of the input tensor
  2281. * -# The same number o channels of the input tensor
  2282. *
  2283. * Unless specified otherwise, arguments are mandatory.
  2284. *
  2285. * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
  2286. * does not involve any arithmetic operation
  2287. *
  2288. * @param[in] input Pointer to input tensor
  2289. * @param[in] input_x Width of input tensor
  2290. * @param[in] input_y Height of input tensor
  2291. * @param[in] input_z Channels in input tensor
  2292. * @param[in] input_w Batch size in input tensor
  2293. * @param[out] output Pointer to output tensor. Expected to be at least
  2294. * input_x * input_y * input_z * input_w
  2295. * bytes.
  2296. * @param[in] offset_w The offset on the W axis to start concatenating the input tensor
  2297. * It is user responsibility to provide the correct value
  2298. *
  2299. */
  2300. void arm_concatenation_s8_w(const int8_t *input,
  2301. const uint16_t input_x,
  2302. const uint16_t input_y,
  2303. const uint16_t input_z,
  2304. const uint16_t input_w,
  2305. int8_t *output,
  2306. const uint32_t offset_w);
  2307. /**
  2308. * @defgroup SVDF SVDF Layer Functions
  2309. *
  2310. */
  2311. /**
  2312. * @brief s8 SVDF function with 8 bit state tensor and 8 bit time weights
  2313. *
  2314. * @param[in] input_ctx Temporary scratch buffer
  2315. * @param[in] output_ctx Temporary output scratch buffer
  2316. * @param[in] svdf_params SVDF Parameters
  2317. * Range of svdf_params->input_offset : [-128, 127]
  2318. * Range of svdf_params->output_offset : [-128, 127]
  2319. * @param[in] input_quant_params Input quantization parameters
  2320. * @param[in] output_quant_params Output quantization parameters
  2321. * @param[in] input_dims Input tensor dimensions
  2322. * @param[in] input_data Pointer to input tensor
  2323. * @param[in] state_dims State tensor dimensions
  2324. * @param[in] state_data Pointer to state tensor
  2325. * @param[in] weights_feature_dims Weights (feature) tensor dimensions
  2326. * @param[in] weights_feature_data Pointer to the weights (feature) tensor
  2327. * @param[in] weights_time_dims Weights (time) tensor dimensions
  2328. * @param[in] weights_time_data Pointer to the weights (time) tensor
  2329. * @param[in] bias_dims Bias tensor dimensions
  2330. * @param[in] bias_data Pointer to bias tensor
  2331. * @param[in] output_dims Output tensor dimensions
  2332. * @param[out] output_data Pointer to the output tensor
  2333. *
  2334. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  2335. *
  2336. * @details
  2337. * 1. Supported framework: TensorFlow Lite micro
  2338. * 2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
  2339. *
  2340. */
  2341. arm_status arm_svdf_s8(const cmsis_nn_context *input_ctx,
  2342. const cmsis_nn_context *output_ctx,
  2343. const cmsis_nn_svdf_params *svdf_params,
  2344. const cmsis_nn_per_tensor_quant_params *input_quant_params,
  2345. const cmsis_nn_per_tensor_quant_params *output_quant_params,
  2346. const cmsis_nn_dims *input_dims,
  2347. const q7_t *input_data,
  2348. const cmsis_nn_dims *state_dims,
  2349. q7_t *state_data,
  2350. const cmsis_nn_dims *weights_feature_dims,
  2351. const q7_t *weights_feature_data,
  2352. const cmsis_nn_dims *weights_time_dims,
  2353. const q7_t *weights_time_data,
  2354. const cmsis_nn_dims *bias_dims,
  2355. const q31_t *bias_data,
  2356. const cmsis_nn_dims *output_dims,
  2357. q7_t *output_data);
  2358. /**
  2359. * @brief s8 SVDF function with 16 bit state tensor and 16 bit time weights
  2360. *
  2361. * @param[in] input_ctx Temporary scratch buffer
  2362. * @param[in] output_ctx Temporary output scratch buffer
  2363. * @param[in] svdf_params SVDF Parameters
  2364. * Range of svdf_params->input_offset : [-128, 127]
  2365. * Range of svdf_params->output_offset : [-128, 127]
  2366. * @param[in] input_quant_params Input quantization parameters
  2367. * @param[in] output_quant_params Output quantization parameters
  2368. * @param[in] input_dims Input tensor dimensions
  2369. * @param[in] input_data Pointer to input tensor
  2370. * @param[in] state_dims State tensor dimensions
  2371. * @param[in] state_data Pointer to state tensor
  2372. * @param[in] weights_feature_dims Weights (feature) tensor dimensions
  2373. * @param[in] weights_feature_data Pointer to the weights (feature) tensor
  2374. * @param[in] weights_time_dims Weights (time) tensor dimensions
  2375. * @param[in] weights_time_data Pointer to the weights (time) tensor
  2376. * @param[in] bias_dims Bias tensor dimensions
  2377. * @param[in] bias_data Pointer to bias tensor
  2378. * @param[in] output_dims Output tensor dimensions
  2379. * @param[out] output_data Pointer to the output tensor
  2380. *
  2381. * @return The function returns <code>ARM_MATH_SUCCESS</code>
  2382. *
  2383. * @details
  2384. * 1. Supported framework: TensorFlow Lite micro
  2385. * 2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
  2386. *
  2387. */
  2388. arm_status arm_svdf_state_s16_s8(const cmsis_nn_context *input_ctx,
  2389. const cmsis_nn_context *output_ctx,
  2390. const cmsis_nn_svdf_params *svdf_params,
  2391. const cmsis_nn_per_tensor_quant_params *input_quant_params,
  2392. const cmsis_nn_per_tensor_quant_params *output_quant_params,
  2393. const cmsis_nn_dims *input_dims,
  2394. const q7_t *input_data,
  2395. const cmsis_nn_dims *state_dims,
  2396. q15_t *state_data,
  2397. const cmsis_nn_dims *weights_feature_dims,
  2398. const q7_t *weights_feature_data,
  2399. const cmsis_nn_dims *weights_time_dims,
  2400. const q15_t *weights_time_data,
  2401. const cmsis_nn_dims *bias_dims,
  2402. const q31_t *bias_data,
  2403. const cmsis_nn_dims *output_dims,
  2404. q7_t *output_data);
  2405. #ifdef __cplusplus
  2406. }
  2407. #endif
  2408. #endif