arm_nnfunctions.h 140 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510
  1. /*
  2. * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the License); you may
  7. * not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  14. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /* ----------------------------------------------------------------------
  19. * Project: CMSIS NN Library
  20. * Title: arm_nnfunctions.h
  21. * Description: Public header file for CMSIS NN Library
  22. *
  23. * $Date: 20 February 2024
  24. * $Revision: V.14.0.0
  25. *
  26. * Target : Arm(R) M-Profile Architecture
  27. * -------------------------------------------------------------------- */
  28. /**
  29. * @defgroup Public Public
  30. * A collection of functions to perform basic operations for neural network layers. Functions with a _s8 suffix support
  31. * TensorFlow Lite framework.
  32. */
  33. #ifndef ARM_NNFUNCTIONS_H
  34. #define ARM_NNFUNCTIONS_H
  35. #include "arm_nn_math_types.h"
  36. #include "arm_nn_types.h"
  37. #define USE_INTRINSIC
  38. #ifdef __cplusplus
  39. extern "C" {
  40. #endif
  41. /**
  42. * @defgroup NNConv Convolution Functions
  43. *
  44. * Collection of convolution, depthwise convolution functions and their variants.
  45. *
  46. * The convolution is implemented in 2 steps: im2col and General Matrix Multiplication(GEMM)
  47. *
  48. * im2col is a process of converting each patch of image data into
  49. * a column. After im2col, the convolution is computed as matrix-matrix
  50. * multiplication.
  51. *
  52. * To reduce the memory footprint, the im2col is performed partially.
  53. * Each iteration, only a few column (i.e., patches) are generated followed
  54. * by GEMM.
  55. *
  56. */
  57. /**
  58. * @brief s4 convolution layer wrapper function with the main purpose to call the optimal kernel available in
  59. * cmsis-nn to perform the convolution.
  60. *
  61. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  62. * arm_convolve_wrapper_s4_get_buffer_size will return the buffer_size if required.
  63. * The caller is expected to clear the buffer ,if applicable, for security reasons.
  64. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  65. * Range of conv_params->input_offset : [-127, 128]
  66. * Range of conv_params->output_offset : [-128, 127]
  67. * @param[in] quant_params Per-channel quantization info.
  68. * It contains the multiplier and shift values to be applied to each output channel
  69. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  70. * @param[in] input_data Input (activation) data pointer. Data type: int8
  71. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
  72. * spatial filter dimensions
  73. * @param[in] filter_data Filter data pointer. Data type: int8 packed with 2x int4
  74. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  75. * @param[in] bias_data Bias data pointer. Data type: int32
  76. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  77. * @param[out] output_data Output data pointer. Data type: int8
  78. *
  79. * @return The function returns either
  80. * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
  81. * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
  82. *
  83. */
  84. arm_cmsis_nn_status arm_convolve_wrapper_s4(const cmsis_nn_context *ctx,
  85. const cmsis_nn_conv_params *conv_params,
  86. const cmsis_nn_per_channel_quant_params *quant_params,
  87. const cmsis_nn_dims *input_dims,
  88. const int8_t *input_data,
  89. const cmsis_nn_dims *filter_dims,
  90. const int8_t *filter_data,
  91. const cmsis_nn_dims *bias_dims,
  92. const int32_t *bias_data,
  93. const cmsis_nn_dims *output_dims,
  94. int8_t *output_data);
  95. /**
  96. * @brief Get the required buffer size for arm_convolve_wrapper_s4
  97. *
  98. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  99. * Range of conv_params->input_offset : [-127, 128]
  100. * Range of conv_params->output_offset : [-128, 127]
  101. * @param[in] input_dims Input (activation) dimensions. Format: [N, H, W, C_IN]
  102. * @param[in] filter_dims Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial
  103. * filter dimensions
  104. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  105. *
  106. * @return The function returns required buffer size(bytes)
  107. *
  108. */
  109. int32_t arm_convolve_wrapper_s4_get_buffer_size(const cmsis_nn_conv_params *conv_params,
  110. const cmsis_nn_dims *input_dims,
  111. const cmsis_nn_dims *filter_dims,
  112. const cmsis_nn_dims *output_dims);
  113. /**
  114. * @brief Get the required buffer size for arm_convolve_wrapper_s4 for Arm(R) Helium Architecture case.
  115. * Refer to arm_convolve_wrapper_s4_get_buffer_size() for function argument details.
  116. *
  117. * @note Intended for compilation on Host. If compiling for an Arm target, use
  118. * arm_convolve_wrapper_s4_get_buffer_size(). Currently this operator does not have an
  119. * mve implementation, so dsp will be used.
  120. *
  121. */
  122. int32_t arm_convolve_wrapper_s4_get_buffer_size_mve(const cmsis_nn_conv_params *conv_params,
  123. const cmsis_nn_dims *input_dims,
  124. const cmsis_nn_dims *filter_dims,
  125. const cmsis_nn_dims *output_dims);
  126. /**
  127. * @brief Get the required buffer size for arm_convolve_wrapper_s4 for processors with DSP extension.
  128. * Refer to arm_convolve_wrapper_s4_get_buffer_size() for function argument details.
  129. *
  130. * @note Intended for compilation on Host. If compiling for an Arm target, use
  131. * arm_convolve_wrapper_s4_get_buffer_size().
  132. *
  133. */
  134. int32_t arm_convolve_wrapper_s4_get_buffer_size_dsp(const cmsis_nn_conv_params *conv_params,
  135. const cmsis_nn_dims *input_dims,
  136. const cmsis_nn_dims *filter_dims,
  137. const cmsis_nn_dims *output_dims);
  138. /**
  139. * @brief s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in
  140. * cmsis-nn to perform the convolution.
  141. *
  142. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  143. * arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required.
  144. * The caller is expected to clear the buffer, if applicable, for security reasons.
  145. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  146. * Range of conv_params->input_offset : [-127, 128]
  147. * Range of conv_params->output_offset : [-128, 127]
  148. * @param[in] quant_params Per-channel quantization info.
  149. * It contains the multiplier and shift values to be applied to each output channel
  150. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  151. * @param[in] input_data Input (activation) data pointer. Data type: int8
  152. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
  153. * spatial filter dimensions
  154. * @param[in] filter_data Filter data pointer. Data type: int8
  155. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  156. * @param[in] bias_data Bias data pointer. Data type: int32
  157. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  158. * @param[out] output_data Output data pointer. Data type: int8
  159. *
  160. * @return The function returns either
  161. * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
  162. * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
  163. *
  164. */
  165. arm_cmsis_nn_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx,
  166. const cmsis_nn_conv_params *conv_params,
  167. const cmsis_nn_per_channel_quant_params *quant_params,
  168. const cmsis_nn_dims *input_dims,
  169. const int8_t *input_data,
  170. const cmsis_nn_dims *filter_dims,
  171. const int8_t *filter_data,
  172. const cmsis_nn_dims *bias_dims,
  173. const int32_t *bias_data,
  174. const cmsis_nn_dims *output_dims,
  175. int8_t *output_data);
  176. /**
  177. * @brief Get the required buffer size for arm_convolve_wrapper_s8
  178. *
  179. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  180. * Range of conv_params->input_offset : [-127, 128]
  181. * Range of conv_params->output_offset : [-128, 127]
  182. * @param[in] input_dims Input (activation) dimensions. Format: [N, H, W, C_IN]
  183. * @param[in] filter_dims Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial
  184. * filter dimensions
  185. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  186. *
  187. * @return The function returns required buffer size(bytes)
  188. *
  189. */
  190. int32_t arm_convolve_wrapper_s8_get_buffer_size(const cmsis_nn_conv_params *conv_params,
  191. const cmsis_nn_dims *input_dims,
  192. const cmsis_nn_dims *filter_dims,
  193. const cmsis_nn_dims *output_dims);
  194. /**
  195. * @brief Get the required buffer size for arm_convolve_wrapper_s8 for Arm(R) Helium Architecture case.
  196. * Refer to arm_convolve_wrapper_s8_get_buffer_size() for function argument details.
  197. *
  198. * @note Intended for compilation on Host. If compiling for an Arm target, use
  199. * arm_convolve_wrapper_s8_get_buffer_size().
  200. *
  201. */
  202. int32_t arm_convolve_wrapper_s8_get_buffer_size_mve(const cmsis_nn_conv_params *conv_params,
  203. const cmsis_nn_dims *input_dims,
  204. const cmsis_nn_dims *filter_dims,
  205. const cmsis_nn_dims *output_dims);
  206. /**
  207. * @brief Get the required buffer size for arm_convolve_wrapper_s8 for processors with DSP extension.
  208. * Refer to arm_convolve_wrapper_s8_get_buffer_size() for function argument details.
  209. *
  210. * @note Intended for compilation on Host. If compiling for an Arm target, use
  211. * arm_convolve_wrapper_s8_get_buffer_size().
  212. *
  213. */
  214. int32_t arm_convolve_wrapper_s8_get_buffer_size_dsp(const cmsis_nn_conv_params *conv_params,
  215. const cmsis_nn_dims *input_dims,
  216. const cmsis_nn_dims *filter_dims,
  217. const cmsis_nn_dims *output_dims);
  218. /**
  219. * @brief s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in
  220. * cmsis-nn to perform the convolution.
  221. *
  222. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  223. * arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required
  224. * The caller is expected to clear the buffer, if applicable, for security reasons.
  225. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  226. * conv_params->input_offset : Not used
  227. * conv_params->output_offset : Not used
  228. * @param[in] quant_params Per-channel quantization info.
  229. * It contains the multiplier and shift values to be applied to each output channel
  230. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  231. * @param[in] input_data Input (activation) data pointer. Data type: int16
  232. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
  233. * spatial filter dimensions
  234. * @param[in] filter_data Filter data pointer. Data type: int8
  235. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  236. * @param[in] bias_data Bias data pointer. Data type: int64
  237. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  238. * @param[out] output_data Output data pointer. Data type: int16
  239. *
  240. * @return The function returns either
  241. * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
  242. * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
  243. *
  244. */
  245. arm_cmsis_nn_status arm_convolve_wrapper_s16(const cmsis_nn_context *ctx,
  246. const cmsis_nn_conv_params *conv_params,
  247. const cmsis_nn_per_channel_quant_params *quant_params,
  248. const cmsis_nn_dims *input_dims,
  249. const int16_t *input_data,
  250. const cmsis_nn_dims *filter_dims,
  251. const int8_t *filter_data,
  252. const cmsis_nn_dims *bias_dims,
  253. const int64_t *bias_data,
  254. const cmsis_nn_dims *output_dims,
  255. int16_t *output_data);
  256. /**
  257. * @brief Get the required buffer size for arm_convolve_wrapper_s16.
  258. *
  259. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  260. * conv_params->input_offset : Not used
  261. * conv_params->output_offset : Not used
  262. * @param[in] input_dims Input (activation) dimensions. Format: [N, H, W, C_IN]
  263. * @param[in] filter_dims Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial
  264. * filter dimensions
  265. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  266. *
  267. * @return The function returns required buffer size(bytes)
  268. *
  269. */
  270. int32_t arm_convolve_wrapper_s16_get_buffer_size(const cmsis_nn_conv_params *conv_params,
  271. const cmsis_nn_dims *input_dims,
  272. const cmsis_nn_dims *filter_dims,
  273. const cmsis_nn_dims *output_dims);
  274. /**
  275. * @brief Get the required buffer size for arm_convolve_wrapper_s16 for for processors with DSP extension.
  276. * Refer to arm_convolve_wrapper_s16_get_buffer_size() for function argument details.
  277. *
  278. * @note Intended for compilation on Host. If compiling for an Arm target, use
  279. * arm_convolve_wrapper_s16_get_buffer_size().
  280. *
  281. */
  282. int32_t arm_convolve_wrapper_s16_get_buffer_size_dsp(const cmsis_nn_conv_params *conv_params,
  283. const cmsis_nn_dims *input_dims,
  284. const cmsis_nn_dims *filter_dims,
  285. const cmsis_nn_dims *output_dims);
  286. /**
  287. * @brief Get the required buffer size for arm_convolve_wrapper_s16 for Arm(R) Helium Architecture case.
  288. * Refer to arm_convolve_wrapper_s16_get_buffer_size() for function argument details.
  289. *
  290. * @note Intended for compilation on Host. If compiling for an Arm target, use
  291. * arm_convolve_wrapper_s16_get_buffer_size().
  292. *
  293. */
  294. int32_t arm_convolve_wrapper_s16_get_buffer_size_mve(const cmsis_nn_conv_params *conv_params,
  295. const cmsis_nn_dims *input_dims,
  296. const cmsis_nn_dims *filter_dims,
  297. const cmsis_nn_dims *output_dims);
  298. /**
  299. * @brief Basic s4 convolution function
  300. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  301. * arm_convolve_s4_get_buffer_size will return the buffer_size if required.
  302. * The caller is expected to clear the buffer ,if applicable, for security reasons.
  303. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  304. * Range of conv_params->input_offset : [-127, 128]
  305. * Range of conv_params->output_offset : [-128, 127]
  306. * @param[in] quant_params Per-channel quantization info.
  307. * It contains the multiplier and shift values to be applied to each output channel
  308. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  309. * @param[in] input_data Input (activation) data pointer. Data type: int8
  310. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
  311. * spatial filter dimensions
  312. * @param[in] filter_data Packed Filter data pointer. Data type: int8 packed with 2x int4
  313. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  314. * @param[in] bias_data Optional bias data pointer. Data type: int32
  315. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  316. * @param[out] output_data Output data pointer. Data type: int8
  317. * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
  318. *
  319. * @details
  320. * 1. Supported framework: TensorFlow Lite micro
  321. * 2. Additional memory is required for optimization. Refer to argument 'ctx' for details.
  322. *
  323. */
  324. arm_cmsis_nn_status arm_convolve_s4(const cmsis_nn_context *ctx,
  325. const cmsis_nn_conv_params *conv_params,
  326. const cmsis_nn_per_channel_quant_params *quant_params,
  327. const cmsis_nn_dims *input_dims,
  328. const int8_t *input_data,
  329. const cmsis_nn_dims *filter_dims,
  330. const int8_t *filter_data,
  331. const cmsis_nn_dims *bias_dims,
  332. const int32_t *bias_data,
  333. const cmsis_nn_dims *output_dims,
  334. int8_t *output_data);
  335. /**
  336. * @brief Basic s8 convolution function
  337. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  338. * arm_convolve_s8_get_buffer_size will return the buffer_size if required.
  339. * The caller is expected to clear the buffer, if applicable, for security reasons.
  340. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  341. * Range of conv_params->input_offset : [-127, 128]
  342. * Range of conv_params->output_offset : [-128, 127]
  343. * @param[in] quant_params Per-channel quantization info.
  344. * It contains the multiplier and shift values to be applied to each output channel
  345. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  346. * @param[in] input_data Input (activation) data pointer. Data type: int8
  347. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, CK] where HK, WK and CK are the
  348. * spatial filter dimensions. CK != C_IN is used for grouped convolution, in which
  349. * case the required conditions are C_IN = N * CK and C_OUT = N * M for N groups of
  350. * size M.
  351. * @param[in] filter_data Filter data pointer. Data type: int8
  352. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  353. * @param[in] bias_data Optional bias data pointer. Data type: int32
  354. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  355. * @param[out] output_data Output data pointer. Data type: int8
  356. * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
  357. *
  358. * @details
  359. * 1. Supported framework: TensorFlow Lite micro
  360. * 2. Additional memory is required for optimization. Refer to argument 'ctx' for details.
  361. *
  362. */
  363. arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
  364. const cmsis_nn_conv_params *conv_params,
  365. const cmsis_nn_per_channel_quant_params *quant_params,
  366. const cmsis_nn_dims *input_dims,
  367. const int8_t *input_data,
  368. const cmsis_nn_dims *filter_dims,
  369. const int8_t *filter_data,
  370. const cmsis_nn_dims *bias_dims,
  371. const int32_t *bias_data,
  372. const cmsis_nn_dims *output_dims,
  373. int8_t *output_data);
  374. /**
  375. * @brief Get the required buffer size for s4 convolution function
  376. *
  377. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  378. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
  379. * are the spatial filter dimensions
  380. * @return The function returns required buffer size(bytes)
  381. *
  382. */
  383. int32_t arm_convolve_s4_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
  384. /**
  385. * @brief Get the required buffer size for s8 convolution function
  386. *
  387. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  388. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
  389. * are the spatial filter dimensions
  390. * @return The function returns required buffer size(bytes)
  391. *
  392. */
  393. int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
  394. /**
  395. * @brief Basic s8 transpose convolution function
  396. * @param[in, out] ctx Function context that contains the additional buffer if required by the
  397. * function.
  398. * arm_transpose_conv_s8_get_buffer_size will return the buffer_size if required.
  399. * The caller is expected to clear the buffer, if applicable, for security
  400. reasons.
  401. * @param[in, out] output_ctx Temporary scratch buffer.
  402. * The size required size is: output width * output height * output channel * 4
  403. * The caller is expected to clear the buffer, if applicable, for security
  404. * reasons.
  405. * @param[in] transpose_conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  406. * Range of transpose_conv_params->input_offset : [-127, 128]
  407. * Range of transpose_conv_params->output_offset : [-128, 127]
  408. * @param[in] quant_params Per-channel quantization info.
  409. * It contains the multiplier and shift values to be applied to each out channel.
  410. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  411. * @param[in] input_data Input (activation) data pointer. Data type: int8
  412. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
  413. * spatial filter dimensions
  414. * @param[in] filter_data Filter data pointer. Data type: int8
  415. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  416. * @param[in] bias_data Optional bias data pointer. Data type: int32
  417. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  418. * @param[out] output_data Output data pointer. Data type: int8
  419. * @return The function returns either
  420. * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
  421. * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
  422. *
  423. * @details
  424. * 1. Supported framework: TensorFlow Lite micro
  425. * 2. Additional memory is required for optimization. Refer to arguments 'ctx' and 'output_ctx' for details.
  426. *
  427. */
  428. arm_cmsis_nn_status arm_transpose_conv_s8(const cmsis_nn_context *ctx,
  429. const cmsis_nn_context *output_ctx,
  430. const cmsis_nn_transpose_conv_params *transpose_conv_params,
  431. const cmsis_nn_per_channel_quant_params *quant_params,
  432. const cmsis_nn_dims *input_dims,
  433. const int8_t *input_data,
  434. const cmsis_nn_dims *filter_dims,
  435. const int8_t *filter_data,
  436. const cmsis_nn_dims *bias_dims,
  437. const int32_t *bias_data,
  438. const cmsis_nn_dims *output_dims,
  439. int8_t *output_data);
  440. /**
  441. * @brief Get the required buffer size for s8 transpose conv function
  442. *
  443. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  444. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
  445. * are the spatial filter dimensions
  446. * @param[in] out_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  447. * @return The function returns required buffer size(bytes)
  448. *
  449. */
  450. int32_t arm_transpose_conv_s8_get_buffer_size(const cmsis_nn_dims *input_dims,
  451. const cmsis_nn_dims *filter_dims,
  452. const cmsis_nn_dims *out_dims);
  453. /**
  454. * @brief Get size of additional buffer required by arm_transpose_conv_s8() for processors with DSP extension.
  455. * Refer to arm_transpose_conv_s8_get_buffer_size() for function argument details.
  456. *
  457. * @note Intended for compilation on Host. If compiling for an Arm target, use
  458. * arm_transpose_conv_s8_get_buffer_size().
  459. *
  460. */
  461. int32_t arm_transpose_conv_s8_get_buffer_size_dsp(const cmsis_nn_dims *input_dims,
  462. const cmsis_nn_dims *filter_dims,
  463. const cmsis_nn_dims *out_dims);
  464. /**
  465. * @brief Get size of additional buffer required by arm_transpose_conv_s8() for Arm(R) Helium Architecture case.
  466. * Refer to arm_transpose_conv_s8_get_buffer_size() for function argument details.
  467. *
  468. * @note Intended for compilation on Host. If compiling for an Arm target, use
  469. * arm_transpose_conv_s8_get_buffer_size().
  470. *
  471. */
  472. int32_t arm_transpose_conv_s8_get_buffer_size_mve(const cmsis_nn_dims *input_dims,
  473. const cmsis_nn_dims *filter_dims,
  474. const cmsis_nn_dims *out_dims);
  475. /**
  476. * @brief Basic s16 convolution function
  477. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  478. * arm_convolve_s16_get_buffer_size will return the buffer_size if required.
  479. * The caller is expected to clear the buffer, if applicable, for security reasons.
  480. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  481. * conv_params->input_offset : Not used
  482. * conv_params->output_offset : Not used
  483. * @param[in] quant_params Per-channel quantization info.
  484. * It contains the multiplier and shift values to be applied to each output channel
  485. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  486. * @param[in] input_data Input (activation) data pointer. Data type: int16
  487. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
  488. * spatial filter dimensions
  489. * @param[in] filter_data Filter data pointer. Data type: int8
  490. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  491. * @param[in] bias_data Optional bias data pointer. Data type: int64
  492. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  493. * @param[out] output_data Output data pointer. Data type: int16
  494. * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
  495. *
  496. * @details
  497. * 1. Supported framework: TensorFlow Lite micro
  498. * 2. Additional memory is required for optimization. Refer to argument 'ctx' for details.
  499. *
  500. */
  501. arm_cmsis_nn_status arm_convolve_s16(const cmsis_nn_context *ctx,
  502. const cmsis_nn_conv_params *conv_params,
  503. const cmsis_nn_per_channel_quant_params *quant_params,
  504. const cmsis_nn_dims *input_dims,
  505. const int16_t *input_data,
  506. const cmsis_nn_dims *filter_dims,
  507. const int8_t *filter_data,
  508. const cmsis_nn_dims *bias_dims,
  509. const int64_t *bias_data,
  510. const cmsis_nn_dims *output_dims,
  511. int16_t *output_data);
  512. /**
  513. * @brief Optimized s16 convolution function
  514. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  515. * arm_convolve_fast_s16_get_buffer_size will return the buffer_size if required.
  516. * The caller is expected to clear the buffer, if applicable, for security reasons.
  517. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  518. * conv_params->input_offset : Not used
  519. * conv_params->output_offset : Not used
  520. * @param[in] quant_params Per-channel quantization info.
  521. * It contains the multiplier and shift values to be applied to each output channel
  522. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  523. * @param[in] input_data Input (activation) data pointer. Data type: int16
  524. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
  525. * spatial filter dimensions. (filter_dims->w * filter_dims->h * input_dims->c) must not
  526. exceed 512
  527. * @param[in] filter_data Filter data pointer. Data type: int8
  528. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  529. * @param[in] bias_data Optional bias data pointer. Data type: int64
  530. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  531. * @param[out] output_data Output data pointer. Data type: int16
  532. * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
  533. *
  534. * @details
  535. * 1. Supported framework: TensorFlow Lite micro
  536. * 2. Additional memory is required for optimization. Refer to argument 'ctx' for details.
  537. * 3. Implementation supports kernel volumes (filter width * filter height * input channels) < 512.
  538. *
  539. */
  540. arm_cmsis_nn_status arm_convolve_fast_s16(const cmsis_nn_context *ctx,
  541. const cmsis_nn_conv_params *conv_params,
  542. const cmsis_nn_per_channel_quant_params *quant_params,
  543. const cmsis_nn_dims *input_dims,
  544. const int16_t *input_data,
  545. const cmsis_nn_dims *filter_dims,
  546. const int8_t *filter_data,
  547. const cmsis_nn_dims *bias_dims,
  548. const int64_t *bias_data,
  549. const cmsis_nn_dims *output_dims,
  550. int16_t *output_data);
  551. /**
  552. * @brief Get the required buffer size for s16 convolution function
  553. *
  554. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  555. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
  556. * are the spatial filter dimensions
  557. * @return The function returns required buffer size(bytes)
  558. *
  559. */
  560. int32_t arm_convolve_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
  561. /**
  562. * @brief Get the required buffer size for fast s16 convolution function
  563. *
  564. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  565. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
  566. * are the spatial filter dimensions
  567. * @return The function returns required buffer size(bytes)
  568. *
  569. */
  570. int32_t arm_convolve_fast_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
  571. /**
  572. * @brief Fast s4 version for 1x1 convolution (non-square shape)
  573. *
  574. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  575. * arm_convolve_1x1_s4_fast_get_buffer_size will return the buffer_size if required.
  576. * The caller is expected to clear the buffer ,if applicable, for security reasons.
  577. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  578. * Range of conv_params->input_offset : [-127, 128]
  579. * Range of conv_params->output_offset : [-128, 127]
  580. * @param[in] quant_params Per-channel quantization info.
  581. * It contains the multiplier and shift values to be applied to each output channel
  582. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  583. * @param[in] input_data Input (activation) data pointer. Data type: int8
  584. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN]
  585. * @param[in] filter_data Filter data pointer. Data type: int8 packed with 2x int4
  586. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  587. * @param[in] bias_data Optional bias data pointer. Data type: int32
  588. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  589. * @param[out] output_data Output data pointer. Data type: int8
  590. *
  591. * @return The function returns either
  592. * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
  593. * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
  594. *
  595. * @details
  596. * - Supported framework : TensorFlow Lite Micro
  597. * - The following constrains on the arguments apply
  598. * -# conv_params->padding.w = conv_params->padding.h = 0
  599. * -# conv_params->stride.w = conv_params->stride.h = 1
  600. *
  601. */
  602. arm_cmsis_nn_status arm_convolve_1x1_s4_fast(const cmsis_nn_context *ctx,
  603. const cmsis_nn_conv_params *conv_params,
  604. const cmsis_nn_per_channel_quant_params *quant_params,
  605. const cmsis_nn_dims *input_dims,
  606. const int8_t *input_data,
  607. const cmsis_nn_dims *filter_dims,
  608. const int8_t *filter_data,
  609. const cmsis_nn_dims *bias_dims,
  610. const int32_t *bias_data,
  611. const cmsis_nn_dims *output_dims,
  612. int8_t *output_data);
  613. /**
  614. * @brief s4 version for 1x1 convolution with support for non-unity stride values
  615. *
  616. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  617. * None is required by this function.
  618. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  619. * Range of conv_params->input_offset : [-127, 128]
  620. * Range of conv_params->output_offset : [-128, 127]
  621. * @param[in] quant_params Per-channel quantization info.
  622. * It contains the multiplier and shift values to be applied to each output channel
  623. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  624. * @param[in] input_data Input (activation) data pointer. Data type: int8
  625. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN]
  626. * @param[in] filter_data Filter data pointer. Data type: int8 packed with 2x int4
  627. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  628. * @param[in] bias_data Optional bias data pointer. Data type: int32
  629. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  630. * @param[out] output_data Output data pointer. Data type: int8
  631. *
  632. * @return The function returns either
  633. * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
  634. * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
  635. * @details
  636. * - Supported framework : TensorFlow Lite Micro
  637. * - The following constrains on the arguments apply
  638. * -# conv_params->padding.w = conv_params->padding.h = 0
  639. *
  640. */
  641. arm_cmsis_nn_status arm_convolve_1x1_s4(const cmsis_nn_context *ctx,
  642. const cmsis_nn_conv_params *conv_params,
  643. const cmsis_nn_per_channel_quant_params *quant_params,
  644. const cmsis_nn_dims *input_dims,
  645. const int8_t *input_data,
  646. const cmsis_nn_dims *filter_dims,
  647. const int8_t *filter_data,
  648. const cmsis_nn_dims *bias_dims,
  649. const int32_t *bias_data,
  650. const cmsis_nn_dims *output_dims,
  651. int8_t *output_data);
  652. /**
  653. * @brief Fast s8 version for 1x1 convolution (non-square shape)
  654. *
  655. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  656. * arm_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required.
  657. * The caller is expected to clear the buffer, if applicable, for security reasons.
  658. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  659. * Range of conv_params->input_offset : [-127, 128]
  660. * Range of conv_params->output_offset : [-128, 127]
  661. * @param[in] quant_params Per-channel quantization info.
  662. * It contains the multiplier and shift values to be applied to each output channel
  663. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  664. * @param[in] input_data Input (activation) data pointer. Data type: int8
  665. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN]
  666. * @param[in] filter_data Filter data pointer. Data type: int8
  667. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  668. * @param[in] bias_data Optional bias data pointer. Data type: int32
  669. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  670. * @param[out] output_data Output data pointer. Data type: int8
  671. *
  672. * @return The function returns either
  673. * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
  674. * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
  675. *
  676. * @details
  677. * - Supported framework : TensorFlow Lite Micro
  678. * - The following constrains on the arguments apply
  679. * -# conv_params->padding.w = conv_params->padding.h = 0
  680. * -# conv_params->stride.w = conv_params->stride.h = 1
  681. *
  682. */
  683. arm_cmsis_nn_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx,
  684. const cmsis_nn_conv_params *conv_params,
  685. const cmsis_nn_per_channel_quant_params *quant_params,
  686. const cmsis_nn_dims *input_dims,
  687. const int8_t *input_data,
  688. const cmsis_nn_dims *filter_dims,
  689. const int8_t *filter_data,
  690. const cmsis_nn_dims *bias_dims,
  691. const int32_t *bias_data,
  692. const cmsis_nn_dims *output_dims,
  693. int8_t *output_data);
  694. /**
  695. * @brief Get the required buffer size for arm_convolve_1x1_s4_fast
  696. *
  697. * @param[in] input_dims Input (activation) dimensions
  698. * @return The function returns the required buffer size in bytes
  699. *
  700. */
  701. int32_t arm_convolve_1x1_s4_fast_get_buffer_size(const cmsis_nn_dims *input_dims);
  702. /**
  703. * @brief Get the required buffer size for arm_convolve_1x1_s8_fast
  704. *
  705. * @param[in] input_dims Input (activation) dimensions
  706. * @return The function returns the required buffer size in bytes
  707. *
  708. */
  709. int32_t arm_convolve_1x1_s8_fast_get_buffer_size(const cmsis_nn_dims *input_dims);
  710. /**
  711. * @brief s8 version for 1x1 convolution with support for non-unity stride values
  712. *
  713. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  714. * None is required by this function.
  715. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  716. * Range of conv_params->input_offset : [-127, 128]
  717. * Range of conv_params->output_offset : [-128, 127]
  718. * @param[in] quant_params Per-channel quantization info.
  719. * It contains the multiplier and shift values to be applied to each output channel
  720. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  721. * @param[in] input_data Input (activation) data pointer. Data type: int8
  722. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN]
  723. * @param[in] filter_data Filter data pointer. Data type: int8
  724. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  725. * @param[in] bias_data Optional bias data pointer. Data type: int32
  726. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  727. * @param[out] output_data Output data pointer. Data type: int8
  728. *
  729. * @return The function returns either
  730. * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
  731. * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
  732. * @details
  733. * - Supported framework : TensorFlow Lite Micro
  734. * - The following constrains on the arguments apply
  735. * -# conv_params->padding.w = conv_params->padding.h = 0
  736. *
  737. */
  738. arm_cmsis_nn_status arm_convolve_1x1_s8(const cmsis_nn_context *ctx,
  739. const cmsis_nn_conv_params *conv_params,
  740. const cmsis_nn_per_channel_quant_params *quant_params,
  741. const cmsis_nn_dims *input_dims,
  742. const int8_t *input_data,
  743. const cmsis_nn_dims *filter_dims,
  744. const int8_t *filter_data,
  745. const cmsis_nn_dims *bias_dims,
  746. const int32_t *bias_data,
  747. const cmsis_nn_dims *output_dims,
  748. int8_t *output_data);
  749. /**
  750. * @brief 1xn convolution
  751. *
  752. * @param[in, out] ctx Function context that contains the additional buffer if required by the function.
  753. * arm_convolve_1_x_n_s8_get_buffer_size will return the buffer_size if required
  754. * The caller is expected to clear the buffer, if applicable, for security reasons.
  755. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  756. * Range of conv_params->input_offset : [-127, 128]
  757. * Range of conv_params->output_offset : [-128, 127]
  758. * @param[in] quant_params Per-channel quantization info.
  759. * It contains the multiplier and shift values to be applied to each output channel
  760. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  761. * @param[in] input_data Input (activation) data pointer. Data type: int8
  762. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal
  763. * spatial filter dimension
  764. * @param[in] filter_data Filter data pointer. Data type: int8
  765. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  766. * @param[in] bias_data Optional bias data pointer. Data type: int32
  767. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  768. * @param[out] output_data Output data pointer. Data type: int8
  769. *
  770. * @return The function returns either
  771. * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
  772. * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
  773. *
  774. * @details
  775. * - Supported framework : TensorFlow Lite Micro
  776. * - The following constrains on the arguments apply
  777. * -# input_dims->n equals 1
  778. * -# ouput_dims->w is a multiple of 4
  779. * -# Explicit constraints(since it is for 1xN convolution)
  780. * -## input_dims->h equals 1
  781. * -## output_dims->h equals 1
  782. * -## filter_dims->h equals 1
  783. *@todo Remove constraint on output_dims->w to make the function generic.
  784. *
  785. */
  786. arm_cmsis_nn_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
  787. const cmsis_nn_conv_params *conv_params,
  788. const cmsis_nn_per_channel_quant_params *quant_params,
  789. const cmsis_nn_dims *input_dims,
  790. const int8_t *input_data,
  791. const cmsis_nn_dims *filter_dims,
  792. const int8_t *filter_data,
  793. const cmsis_nn_dims *bias_dims,
  794. const int32_t *bias_data,
  795. const cmsis_nn_dims *output_dims,
  796. int8_t *output_data);
  797. /**
  798. * @brief Get the required additional buffer size for 1xn convolution
  799. *
  800. * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
  801. * Range of conv_params->input_offset : [-127, 128]
  802. * Range of conv_params->output_offset : [-128, 127]
  803. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  804. * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the
  805. * horizontal spatial filter dimension
  806. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  807. *
  808. * @return The function returns required buffer size(bytes)
  809. *
  810. */
  811. int32_t arm_convolve_1_x_n_s8_get_buffer_size(const cmsis_nn_conv_params *conv_params,
  812. const cmsis_nn_dims *input_dims,
  813. const cmsis_nn_dims *filter_dims,
  814. const cmsis_nn_dims *output_dims);
  815. /**
  816. * @brief Wrapper function to pick the right optimized s8 depthwise convolution function
  817. *
  818. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  819. * definition file to see if an additional buffer is required.
  820. * Optional function {API}_get_buffer_size() provides the buffer
  821. * size if required.
  822. * The caller is expected to clear the buffer, if applicable, for security reasons.
  823. * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
  824. * dw_conv_params->dilation is not used.
  825. * Range of dw_conv_params->input_offset : [-127, 128]
  826. * Range of dw_conv_params->output_offset : [-128, 127]
  827. * @param[in] quant_params Per-channel quantization info.
  828. * It contains the multiplier and shift values to be applied to each
  829. * output channel
  830. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  831. * Batch argument N is not used and assumed to be 1.
  832. * @param[in] input_data Input (activation) data pointer. Data type: int8
  833. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  834. * @param[in] filter_data Filter data pointer. Data type: int8
  835. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  836. * @param[in] bias_data Bias data pointer. Data type: int32
  837. * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT]
  838. * @param[in, out] output_data Output data pointer. Data type: int8
  839. * @return The function returns
  840. * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful completion.
  841. *
  842. * @details
  843. * - Supported framework: TensorFlow Lite
  844. * - Picks one of the the following functions
  845. * -# arm_depthwise_conv_s8()
  846. * -# arm_depthwise_conv_3x3_s8() - Cortex-M CPUs with DSP extension only
  847. * -# arm_depthwise_conv_s8_opt()
  848. * - Check details of arm_depthwise_conv_s8_opt() for potential data that can be accessed outside of the
  849. * boundary.
  850. */
  851. arm_cmsis_nn_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx,
  852. const cmsis_nn_dw_conv_params *dw_conv_params,
  853. const cmsis_nn_per_channel_quant_params *quant_params,
  854. const cmsis_nn_dims *input_dims,
  855. const int8_t *input_data,
  856. const cmsis_nn_dims *filter_dims,
  857. const int8_t *filter_data,
  858. const cmsis_nn_dims *bias_dims,
  859. const int32_t *bias_data,
  860. const cmsis_nn_dims *output_dims,
  861. int8_t *output_data);
  862. /**
  863. * @brief Wrapper function to pick the right optimized s4 depthwise convolution function
  864. *
  865. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  866. * definition file to see if an additional buffer is required.
  867. * Optional function {API}_get_buffer_size() provides the buffer
  868. * size if required.
  869. * The caller is expected to clear the buffer ,if applicable, for security reasons.
  870. * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
  871. * dw_conv_params->dilation is not used.
  872. * Range of dw_conv_params->input_offset : [-127, 128]
  873. * Range of dw_conv_params->output_offset : [-128, 127]
  874. * @param[in] quant_params Per-channel quantization info.
  875. * It contains the multiplier and shift values to be applied to each
  876. * output channel
  877. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  878. * Batch argument N is not used and assumed to be 1.
  879. * @param[in] input_data Input (activation) data pointer. Data type: int8
  880. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  881. * @param[in] filter_data Filter data pointer. Data type: int8_t packed 4-bit weights, e.g four sequential
  882. * weights [0x1, 0x2, 0x3, 0x4] packed as [0x21, 0x43].
  883. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  884. * @param[in] bias_data Bias data pointer. Data type: int32
  885. * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT]
  886. * @param[in, out] output_data Output data pointer. Data type: int8
  887. * @return The function returns
  888. * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful completion.
  889. *
  890. * @details
  891. * - Supported framework: TensorFlow Lite
  892. */
  893. arm_cmsis_nn_status arm_depthwise_conv_wrapper_s4(const cmsis_nn_context *ctx,
  894. const cmsis_nn_dw_conv_params *dw_conv_params,
  895. const cmsis_nn_per_channel_quant_params *quant_params,
  896. const cmsis_nn_dims *input_dims,
  897. const int8_t *input_data,
  898. const cmsis_nn_dims *filter_dims,
  899. const int8_t *filter_data,
  900. const cmsis_nn_dims *bias_dims,
  901. const int32_t *bias_data,
  902. const cmsis_nn_dims *output_dims,
  903. int8_t *output_data);
  904. /**
  905. * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s8()
  906. *
  907. * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
  908. * Range of dw_conv_params->input_offset : [-127, 128]
  909. * Range of dw_conv_params->input_offset : [-128, 127]
  910. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  911. * Batch argument N is not used and assumed to be 1.
  912. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  913. * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT]
  914. * @return Size of additional memory required for optimizations in bytes.
  915. *
  916. */
  917. int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params,
  918. const cmsis_nn_dims *input_dims,
  919. const cmsis_nn_dims *filter_dims,
  920. const cmsis_nn_dims *output_dims);
  921. /**
  922. * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s8() for processors with DSP extension.
  923. * Refer to arm_depthwise_conv_wrapper_s8_get_buffer_size() for function argument details.
  924. *
  925. * @note Intended for compilation on Host. If compiling for an Arm target, use
  926. * arm_depthwise_conv_wrapper_s8_get_buffer_size().
  927. *
  928. */
  929. int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size_dsp(const cmsis_nn_dw_conv_params *dw_conv_params,
  930. const cmsis_nn_dims *input_dims,
  931. const cmsis_nn_dims *filter_dims,
  932. const cmsis_nn_dims *output_dims);
  933. /**
  934. * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s8() for Arm(R) Helium Architecture case.
  935. * Refer to arm_depthwise_conv_wrapper_s8_get_buffer_size() for function argument details.
  936. *
  937. * @note Intended for compilation on Host. If compiling for an Arm target, use
  938. * arm_depthwise_conv_wrapper_s8_get_buffer_size().
  939. *
  940. */
  941. int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size_mve(const cmsis_nn_dw_conv_params *dw_conv_params,
  942. const cmsis_nn_dims *input_dims,
  943. const cmsis_nn_dims *filter_dims,
  944. const cmsis_nn_dims *output_dims);
  945. /**
  946. * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s4()
  947. *
  948. * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
  949. * Range of dw_conv_params->input_offset : [-127, 128]
  950. * Range of dw_conv_params->input_offset : [-128, 127]
  951. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  952. * Batch argument N is not used and assumed to be 1.
  953. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  954. * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT]
  955. * @return Size of additional memory required for optimizations in bytes.
  956. *
  957. */
  958. int32_t arm_depthwise_conv_wrapper_s4_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params,
  959. const cmsis_nn_dims *input_dims,
  960. const cmsis_nn_dims *filter_dims,
  961. const cmsis_nn_dims *output_dims);
  962. /**
  963. * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s4() for processors with DSP extension.
  964. * Refer to arm_depthwise_conv_wrapper_s4_get_buffer_size() for function argument details.
  965. *
  966. * @note Intended for compilation on Host. If compiling for an Arm target, use
  967. * arm_depthwise_conv_wrapper_s4_get_buffer_size().
  968. *
  969. */
  970. int32_t arm_depthwise_conv_wrapper_s4_get_buffer_size_dsp(const cmsis_nn_dw_conv_params *dw_conv_params,
  971. const cmsis_nn_dims *input_dims,
  972. const cmsis_nn_dims *filter_dims,
  973. const cmsis_nn_dims *output_dims);
  974. /**
  975. * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s4() for Arm(R) Helium Architecture case.
  976. * Refer to arm_depthwise_conv_wrapper_s4_get_buffer_size() for function argument details.
  977. *
  978. * @note Intended for compilation on Host. If compiling for an Arm target, use
  979. * arm_depthwise_conv_wrapper_s4_get_buffer_size().
  980. *
  981. */
  982. int32_t arm_depthwise_conv_wrapper_s4_get_buffer_size_mve(const cmsis_nn_dw_conv_params *dw_conv_params,
  983. const cmsis_nn_dims *input_dims,
  984. const cmsis_nn_dims *filter_dims,
  985. const cmsis_nn_dims *output_dims);
  986. /**
  987. * @brief Basic s8 depthwise convolution function that doesn't have any constraints on the input dimensions.
  988. *
  989. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  990. * definition file to see if an additional buffer is required.
  991. * Optional function {API}_get_buffer_size() provides the buffer
  992. * size if an additional buffer is required exists if additional memory is.
  993. * The caller is expected to clear the buffer, if applicable, for security reasons.
  994. * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
  995. * dw_conv_params->dilation is not used.
  996. * Range of dw_conv_params->input_offset : [-127, 128]
  997. * Range of dw_conv_params->input_offset : [-128, 127]
  998. * @param[in] quant_params Per-channel quantization info.
  999. * It contains the multiplier and shift values to be applied to each
  1000. * output channel
  1001. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  1002. * Batch argument N is not used.
  1003. * @param[in] input_data Input (activation) data pointer. Data type: int8
  1004. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  1005. * @param[in] filter_data Filter data pointer. Data type: int8
  1006. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  1007. * @param[in] bias_data Bias data pointer. Data type: int32
  1008. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  1009. * @param[in, out] output_data Output data pointer. Data type: int8
  1010. * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
  1011. *
  1012. * @details
  1013. * - Supported framework: TensorFlow Lite
  1014. */
  1015. arm_cmsis_nn_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx,
  1016. const cmsis_nn_dw_conv_params *dw_conv_params,
  1017. const cmsis_nn_per_channel_quant_params *quant_params,
  1018. const cmsis_nn_dims *input_dims,
  1019. const int8_t *input_data,
  1020. const cmsis_nn_dims *filter_dims,
  1021. const int8_t *filter_data,
  1022. const cmsis_nn_dims *bias_dims,
  1023. const int32_t *bias_data,
  1024. const cmsis_nn_dims *output_dims,
  1025. int8_t *output_data);
  1026. /**
  1027. * @brief Basic s4 depthwise convolution function that doesn't have any constraints on the input dimensions.
  1028. *
  1029. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1030. * definition file to see if an additional buffer is required.
  1031. * Optional function {API}_get_buffer_size() provides the buffer
  1032. * size if an additional buffer is required exists if additional memory is.
  1033. * The caller is expected to clear the buffer ,if applicable, for security reasons.
  1034. * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
  1035. * dw_conv_params->dilation is not used.
  1036. * Range of dw_conv_params->input_offset : [-127, 128]
  1037. * Range of dw_conv_params->input_offset : [-128, 127]
  1038. * @param[in] quant_params Per-channel quantization info.
  1039. * It contains the multiplier and shift values to be applied to each
  1040. * output channel
  1041. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  1042. * Batch argument N is not used.
  1043. * @param[in] input Input (activation) data pointer. Data type: int8
  1044. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  1045. * @param[in] kernel Filter data pointer. Data type: int8_t packed 4-bit weights, e.g four sequential
  1046. * weights [0x1, 0x2, 0x3, 0x4] packed as [0x21, 0x43].
  1047. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  1048. * @param[in] bias Bias data pointer. Data type: int32
  1049. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  1050. * @param[in, out] output Output data pointer. Data type: int8
  1051. * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
  1052. *
  1053. * @details
  1054. * - Supported framework: TensorFlow Lite
  1055. */
  1056. arm_cmsis_nn_status arm_depthwise_conv_s4(const cmsis_nn_context *ctx,
  1057. const cmsis_nn_dw_conv_params *dw_conv_params,
  1058. const cmsis_nn_per_channel_quant_params *quant_params,
  1059. const cmsis_nn_dims *input_dims,
  1060. const int8_t *input,
  1061. const cmsis_nn_dims *filter_dims,
  1062. const int8_t *kernel,
  1063. const cmsis_nn_dims *bias_dims,
  1064. const int32_t *bias,
  1065. const cmsis_nn_dims *output_dims,
  1066. int8_t *output);
  1067. /**
  1068. * @brief Basic s16 depthwise convolution function that doesn't have any constraints on the input dimensions.
  1069. *
  1070. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1071. * definition file to see if an additional buffer is required.
  1072. * Optional function {API}_get_buffer_size() provides the buffer
  1073. * size if an additional buffer is required.
  1074. * exists if additional memory is.
  1075. * The caller is expected to clear the buffer, if applicable, for security reasons.
  1076. * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
  1077. * conv_params->input_offset : Not used
  1078. * conv_params->output_offset : Not used
  1079. * @param[in] quant_params Per-channel quantization info.
  1080. * It contains the multiplier and shift values to be applied to each
  1081. * output channel
  1082. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  1083. * Batch argument N is not used.
  1084. * @param[in] input_data Input (activation) data pointer. Data type: int8
  1085. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  1086. * @param[in] filter_data Filter data pointer. Data type: int8
  1087. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  1088. * @param[in] bias_data Bias data pointer. Data type: int64
  1089. * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
  1090. * @param[in, out] output_data Output data pointer. Data type: int16
  1091. * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
  1092. *
  1093. * @details
  1094. * - Supported framework: TensorFlow Lite
  1095. */
  1096. arm_cmsis_nn_status arm_depthwise_conv_s16(const cmsis_nn_context *ctx,
  1097. const cmsis_nn_dw_conv_params *dw_conv_params,
  1098. const cmsis_nn_per_channel_quant_params *quant_params,
  1099. const cmsis_nn_dims *input_dims,
  1100. const int16_t *input_data,
  1101. const cmsis_nn_dims *filter_dims,
  1102. const int8_t *filter_data,
  1103. const cmsis_nn_dims *bias_dims,
  1104. const int64_t *bias_data,
  1105. const cmsis_nn_dims *output_dims,
  1106. int16_t *output_data);
  1107. /**
  1108. * @brief Wrapper function to pick the right optimized s16 depthwise convolution function
  1109. *
  1110. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1111. * definition file to see if an additional buffer is required.
  1112. * Optional function {API}_get_buffer_size() provides the buffer
  1113. * size if required.
  1114. * The caller is expected to clear the buffer, if applicable, for security reasons.
  1115. * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
  1116. * dw_conv_params->dilation is not used.
  1117. * Range of dw_conv_params->input_offset : Not used
  1118. * Range of dw_conv_params->output_offset : Not used
  1119. * @param[in] quant_params Per-channel quantization info.
  1120. * It contains the multiplier and shift values to be applied to each
  1121. * output channel
  1122. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  1123. * Batch argument N is not used and assumed to be 1.
  1124. * @param[in] input_data Input (activation) data pointer. Data type: int16
  1125. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  1126. * @param[in] filter_data Filter data pointer. Data type: int8
  1127. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  1128. * @param[in] bias_data Bias data pointer. Data type: int64
  1129. * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT]
  1130. * @param[in, out] output_data Output data pointer. Data type: int16
  1131. * @return The function returns
  1132. * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful completion.
  1133. *
  1134. * @details
  1135. * - Supported framework: TensorFlow Lite
  1136. * - Picks one of the the following functions
  1137. * -# arm_depthwise_conv_s16()
  1138. * -# arm_depthwise_conv_fast_s16() - Cortex-M CPUs with DSP extension only
  1139. */
  1140. arm_cmsis_nn_status arm_depthwise_conv_wrapper_s16(const cmsis_nn_context *ctx,
  1141. const cmsis_nn_dw_conv_params *dw_conv_params,
  1142. const cmsis_nn_per_channel_quant_params *quant_params,
  1143. const cmsis_nn_dims *input_dims,
  1144. const int16_t *input_data,
  1145. const cmsis_nn_dims *filter_dims,
  1146. const int8_t *filter_data,
  1147. const cmsis_nn_dims *bias_dims,
  1148. const int64_t *bias_data,
  1149. const cmsis_nn_dims *output_dims,
  1150. int16_t *output_data);
  1151. /**
  1152. * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s16()
  1153. *
  1154. * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
  1155. * Range of dw_conv_params->input_offset : Not used
  1156. * Range of dw_conv_params->input_offset : Not used
  1157. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  1158. * Batch argument N is not used and assumed to be 1.
  1159. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  1160. * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT]
  1161. * @return Size of additional memory required for optimizations in bytes.
  1162. *
  1163. */
  1164. int32_t arm_depthwise_conv_wrapper_s16_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params,
  1165. const cmsis_nn_dims *input_dims,
  1166. const cmsis_nn_dims *filter_dims,
  1167. const cmsis_nn_dims *output_dims);
  1168. /**
  1169. * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s16() for processors with DSP extension.
  1170. * Refer to arm_depthwise_conv_wrapper_s16_get_buffer_size() for function argument details.
  1171. *
  1172. * @note Intended for compilation on Host. If compiling for an Arm target, use
  1173. * arm_depthwise_conv_wrapper_s16_get_buffer_size().
  1174. *
  1175. */
  1176. int32_t arm_depthwise_conv_wrapper_s16_get_buffer_size_dsp(const cmsis_nn_dw_conv_params *dw_conv_params,
  1177. const cmsis_nn_dims *input_dims,
  1178. const cmsis_nn_dims *filter_dims,
  1179. const cmsis_nn_dims *output_dims);
  1180. /**
  1181. * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s16() for Arm(R) Helium Architecture
  1182. * case. Refer to arm_depthwise_conv_wrapper_s16_get_buffer_size() for function argument details.
  1183. *
  1184. * @note Intended for compilation on Host. If compiling for an Arm target, use
  1185. * arm_depthwise_conv_wrapper_s16_get_buffer_size().
  1186. *
  1187. */
  1188. int32_t arm_depthwise_conv_wrapper_s16_get_buffer_size_mve(const cmsis_nn_dw_conv_params *dw_conv_params,
  1189. const cmsis_nn_dims *input_dims,
  1190. const cmsis_nn_dims *filter_dims,
  1191. const cmsis_nn_dims *output_dims);
  1192. /**
  1193. * @brief Optimized s16 depthwise convolution function with constraint that in_channel equals out_channel.
  1194. * Refer arm_depthwise_conv_s16() for function argument details.
  1195. *
  1196. * @return The function returns one of the following
  1197. * <code>ARM_CMSIS_NN_ARG_ERROR</code> - ctx-buff == NULL and
  1198. * arm_depthwise_conv_fast_s16_get_buffer_size() > 0 or
  1199. * input channel != output channel or
  1200. * ch_mult != 1
  1201. *
  1202. * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
  1203. *
  1204. * @details
  1205. * - Supported framework: TensorFlow Lite
  1206. * - The following constrains on the arguments apply
  1207. * -# Number of input channel equals number of output channels or ch_mult equals 1
  1208. * - Reccomended when number of channels is 4 or greater.
  1209. *
  1210. */
  1211. arm_cmsis_nn_status arm_depthwise_conv_fast_s16(const cmsis_nn_context *ctx,
  1212. const cmsis_nn_dw_conv_params *dw_conv_params,
  1213. const cmsis_nn_per_channel_quant_params *quant_params,
  1214. const cmsis_nn_dims *input_dims,
  1215. const int16_t *input_data,
  1216. const cmsis_nn_dims *filter_dims,
  1217. const int8_t *filter_data,
  1218. const cmsis_nn_dims *bias_dims,
  1219. const int64_t *bias_data,
  1220. const cmsis_nn_dims *output_dims,
  1221. int16_t *output_data);
  1222. /**
  1223. * @brief Get the required buffer size for optimized s16 depthwise convolution
  1224. * function with constraint that in_channel equals out_channel.
  1225. * @param[in] input_dims Input (activation) tensor dimensions. Format: [1, H, W, C_IN]
  1226. * Batch argument N is not used.
  1227. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  1228. * @return The function returns required buffer size in bytes
  1229. *
  1230. */
  1231. int32_t arm_depthwise_conv_fast_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
  1232. /**
  1233. * @brief Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on
  1234. * the input arguments(documented below). Refer arm_depthwise_conv_s8() for function
  1235. * argument details.
  1236. *
  1237. * @return The function returns one of the following
  1238. * <code>ARM_CMSIS_NN_ARG_ERROR</code> - Unsupported dimension of tensors
  1239. * - Unsupported pad size along the x axis
  1240. * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
  1241. *
  1242. * @details
  1243. * - Supported framework : TensorFlow Lite Micro
  1244. * - The following constrains on the arguments apply
  1245. * -# Number of input channel equals number of output channels
  1246. * -# Filter height and width equals 3
  1247. * -# Padding along x is either 0 or 1.
  1248. *
  1249. */
  1250. arm_cmsis_nn_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx,
  1251. const cmsis_nn_dw_conv_params *dw_conv_params,
  1252. const cmsis_nn_per_channel_quant_params *quant_params,
  1253. const cmsis_nn_dims *input_dims,
  1254. const int8_t *input_data,
  1255. const cmsis_nn_dims *filter_dims,
  1256. const int8_t *filter_data,
  1257. const cmsis_nn_dims *bias_dims,
  1258. const int32_t *bias_data,
  1259. const cmsis_nn_dims *output_dims,
  1260. int8_t *output_data);
  1261. /**
  1262. * @brief Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel.
  1263. * Refer arm_depthwise_conv_s8() for function argument details.
  1264. *
  1265. * @return The function returns one of the following
  1266. * <code>ARM_CMSIS_NN_ARG_ERROR</code> - input channel != output channel or
  1267. * ch_mult != 1
  1268. * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
  1269. *
  1270. * @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out
  1271. * for the following if MVE optimizations(Arm Helium Technology) are used.
  1272. * - Output shift
  1273. * - Output multiplier
  1274. * - Output bias
  1275. * - kernel
  1276. * @details
  1277. * - Supported framework: TensorFlow Lite
  1278. * - The following constrains on the arguments apply
  1279. * -# Number of input channel equals number of output channels or ch_mult equals 1
  1280. * - Reccomended when number of channels is 4 or greater.
  1281. *
  1282. */
  1283. arm_cmsis_nn_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
  1284. const cmsis_nn_dw_conv_params *dw_conv_params,
  1285. const cmsis_nn_per_channel_quant_params *quant_params,
  1286. const cmsis_nn_dims *input_dims,
  1287. const int8_t *input_data,
  1288. const cmsis_nn_dims *filter_dims,
  1289. const int8_t *filter_data,
  1290. const cmsis_nn_dims *bias_dims,
  1291. const int32_t *bias_data,
  1292. const cmsis_nn_dims *output_dims,
  1293. int8_t *output_data);
  1294. /**
  1295. * @brief Optimized s4 depthwise convolution function with constraint that in_channel equals out_channel.
  1296. * Refer arm_depthwise_conv_s4() for function argument details.
  1297. *
  1298. * @return The function returns one of the following
  1299. * <code>ARM_CMSIS_NN_ARG_ERROR</code> - input channel != output channel or
  1300. * ch_mult != 1
  1301. * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
  1302. *
  1303. * @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out
  1304. * for the following if MVE optimizations(Arm Helium Technology) are used.
  1305. * - Output shift
  1306. * - Output multiplier
  1307. * - Output bias
  1308. * - kernel
  1309. * @details
  1310. * - Supported framework: TensorFlow Lite
  1311. * - The following constrains on the arguments apply
  1312. * -# Number of input channel equals number of output channels or ch_mult equals 1
  1313. * - Reccomended when number of channels is 4 or greater.
  1314. *
  1315. */
  1316. arm_cmsis_nn_status arm_depthwise_conv_s4_opt(const cmsis_nn_context *ctx,
  1317. const cmsis_nn_dw_conv_params *dw_conv_params,
  1318. const cmsis_nn_per_channel_quant_params *quant_params,
  1319. const cmsis_nn_dims *input_dims,
  1320. const int8_t *input_data,
  1321. const cmsis_nn_dims *filter_dims,
  1322. const int8_t *filter_data,
  1323. const cmsis_nn_dims *bias_dims,
  1324. const int32_t *bias_data,
  1325. const cmsis_nn_dims *output_dims,
  1326. int8_t *output_data);
  1327. /**
  1328. * @brief Get the required buffer size for optimized s8 depthwise convolution
  1329. * function with constraint that in_channel equals out_channel.
  1330. * @param[in] input_dims Input (activation) tensor dimensions. Format: [1, H, W, C_IN]
  1331. * Batch argument N is not used.
  1332. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  1333. * @return The function returns required buffer size in bytes
  1334. *
  1335. */
  1336. int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
  1337. /**
  1338. * @brief Get the required buffer size for optimized s4 depthwise convolution
  1339. * function with constraint that in_channel equals out_channel.
  1340. * @param[in] input_dims Input (activation) tensor dimensions. Format: [1, H, W, C_IN]
  1341. * Batch argument N is not used.
  1342. * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT]
  1343. * @return The function returns required buffer size in bytes
  1344. *
  1345. */
  1346. int32_t arm_depthwise_conv_s4_opt_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
  1347. /**
  1348. * @defgroup FC Fully-connected Layer Functions
  1349. *
  1350. * Collection of fully-connected and matrix multiplication functions.
  1351. *
  1352. * Fully-connected layer is basically a matrix-vector multiplication
  1353. * with bias. The matrix is the weights and the input/output vectors
  1354. * are the activation values. Supported {weight, activation} precisions
  1355. * include {8-bit, 8-bit} and {8-bit, 16-bit}
  1356. *
  1357. *
  1358. */
  1359. /**
  1360. * @brief Basic s4 Fully Connected function.
  1361. *
  1362. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1363. * definition file to see if an additional buffer is required.
  1364. * Optional function {API}_get_buffer_size() provides the buffer
  1365. * size if an additional buffer is required.
  1366. * The caller is expected to clear the buffer ,if applicable, for security reasons.
  1367. * @param[in] fc_params Fully Connected layer parameters.
  1368. * Range of fc_params->input_offset : [-127, 128]
  1369. * fc_params->filter_offset : 0
  1370. * Range of fc_params->output_offset : [-128, 127]
  1371. * @param[in] quant_params Per-tensor quantization info.
  1372. * It contains the multiplier and shift values to be applied to the output tensor.
  1373. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  1374. * Input dimension is taken as Nx(H * W * C_IN)
  1375. * @param[in] input_data Input (activation) data pointer. Data type: int8
  1376. * @param[in] filter_dims Two dimensional filter dimensions. Format: [N, C]
  1377. * N : accumulation depth and equals (H * W * C_IN) from input_dims
  1378. * C : output depth and equals C_OUT in output_dims
  1379. * H & W : Not used
  1380. * @param[in] filter_data Filter data pointer. Data type: int8_t packed 4-bit weights, e.g four sequential
  1381. * weights [0x1, 0x2, 0x3, 0x4] packed as [0x21, 0x43].
  1382. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  1383. * N, H, W : Not used
  1384. * @param[in] bias_data Bias data pointer. Data type: int32
  1385. * @param[in] output_dims Output tensor dimensions. Format: [N, C_OUT]
  1386. * N : Batches
  1387. * C_OUT : Output depth
  1388. * H & W : Not used.
  1389. * @param[in, out] output_data Output data pointer. Data type: int8
  1390. * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
  1391. *
  1392. * @details
  1393. * - Supported framework: TensorFlow Lite
  1394. */
  1395. arm_cmsis_nn_status arm_fully_connected_s4(const cmsis_nn_context *ctx,
  1396. const cmsis_nn_fc_params *fc_params,
  1397. const cmsis_nn_per_tensor_quant_params *quant_params,
  1398. const cmsis_nn_dims *input_dims,
  1399. const int8_t *input_data,
  1400. const cmsis_nn_dims *filter_dims,
  1401. const int8_t *filter_data,
  1402. const cmsis_nn_dims *bias_dims,
  1403. const int32_t *bias_data,
  1404. const cmsis_nn_dims *output_dims,
  1405. int8_t *output_data);
  1406. /**
  1407. * @brief Basic s8 Fully Connected function.
  1408. *
  1409. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1410. * definition file to see if an additional buffer is required.
  1411. * Optional function {API}_get_buffer_size() provides the buffer
  1412. * size if an additional buffer is required.
  1413. * The caller is expected to clear the buffer, if applicable, for security reasons.
  1414. * @param[in] fc_params Fully Connected layer parameters.
  1415. * Range of fc_params->input_offset : [-127, 128]
  1416. * fc_params->filter_offset : 0
  1417. * Range of fc_params->output_offset : [-128, 127]
  1418. * @param[in] quant_params Per-tensor quantization info.
  1419. * It contains the multiplier and shift values to be applied to the output tensor.
  1420. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  1421. * Input dimension is taken as Nx(H * W * C_IN)
  1422. * @param[in] input_data Input (activation) data pointer. Data type: int8
  1423. * @param[in] filter_dims Two dimensional filter dimensions. Format: [N, C]
  1424. * N : accumulation depth and equals (H * W * C_IN) from input_dims
  1425. * C : output depth and equals C_OUT in output_dims
  1426. * H & W : Not used
  1427. * @param[in] filter_data Filter data pointer. Data type: int8
  1428. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  1429. * N, H, W : Not used
  1430. * @param[in] bias_data Bias data pointer. Data type: int32
  1431. * @param[in] output_dims Output tensor dimensions. Format: [N, C_OUT]
  1432. * N : Batches
  1433. * C_OUT : Output depth
  1434. * H & W : Not used.
  1435. * @param[in, out] output_data Output data pointer. Data type: int8
  1436. *
  1437. * @return The function returns either
  1438. * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
  1439. * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
  1440. *
  1441. * @details
  1442. * - Supported framework: TensorFlow Lite
  1443. */
  1444. arm_cmsis_nn_status arm_fully_connected_s8(const cmsis_nn_context *ctx,
  1445. const cmsis_nn_fc_params *fc_params,
  1446. const cmsis_nn_per_tensor_quant_params *quant_params,
  1447. const cmsis_nn_dims *input_dims,
  1448. const int8_t *input_data,
  1449. const cmsis_nn_dims *filter_dims,
  1450. const int8_t *filter_data,
  1451. const cmsis_nn_dims *bias_dims,
  1452. const int32_t *bias_data,
  1453. const cmsis_nn_dims *output_dims,
  1454. int8_t *output_data);
  1455. /**
  1456. * @brief Calculate the sum of each row in vector_data, multiply by lhs_offset and optionally add bias_data.
  1457. * @param[in, out] vector_sum_buf Buffer for vector sums
  1458. * @param[in] vector_cols Number of vector columns
  1459. * @param[in] vector_rows Number of vector rows
  1460. * @param[in] vector_data Vector of weigths data
  1461. * @param[in] lhs_offset Constant multiplied with each sum
  1462. * @param[in] bias_data Vector of bias data, added to each sum.
  1463. * @return The function returns
  1464. * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
  1465. */
  1466. arm_cmsis_nn_status arm_vector_sum_s8(int32_t *vector_sum_buf,
  1467. const int32_t vector_cols,
  1468. const int32_t vector_rows,
  1469. const int8_t *vector_data,
  1470. const int32_t lhs_offset,
  1471. const int32_t *bias_data);
  1472. /**
  1473. * @brief Get size of additional buffer required by arm_fully_connected_s8().
  1474. * See also arm_vector_sum_s8, which is required if buffer size is > 0.
  1475. * @param[in] filter_dims dimension of filter
  1476. * @return The function returns required buffer size in bytes
  1477. *
  1478. */
  1479. int32_t arm_fully_connected_s8_get_buffer_size(const cmsis_nn_dims *filter_dims);
  1480. /**
  1481. * @brief Get size of additional buffer required by arm_fully_connected_s8() for processors with DSP extension.
  1482. * Refer to arm_fully_connected_s8_get_buffer_size() for function argument details.
  1483. *
  1484. * @note Intended for compilation on Host. If compiling for an Arm target, use
  1485. * arm_fully_connected_s8_get_buffer_size().
  1486. *
  1487. */
  1488. int32_t arm_fully_connected_s8_get_buffer_size_dsp(const cmsis_nn_dims *filter_dims);
  1489. /**
  1490. * @brief Get size of additional buffer required by arm_fully_connected_s8() for Arm(R) Helium Architecture case.
  1491. * Refer to arm_fully_connected_s8_get_buffer_size() for function argument details.
  1492. *
  1493. * @note Intended for compilation on Host. If compiling for an Arm target, use
  1494. * arm_fully_connected_s8_get_buffer_size().
  1495. *
  1496. */
  1497. int32_t arm_fully_connected_s8_get_buffer_size_mve(const cmsis_nn_dims *filter_dims);
  1498. /**
  1499. * @brief Basic s16 Fully Connected function.
  1500. *
  1501. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1502. * definition file to see if an additional buffer is required.
  1503. * Optional function {API}_get_buffer_size() provides the buffer
  1504. * size if an additional buffer is required.
  1505. * The caller is expected to clear the buffer, if applicable, for security reasons.
  1506. * @param[in] fc_params Fully Connected layer parameters.
  1507. * fc_params->input_offset : 0
  1508. * fc_params->filter_offset : 0
  1509. * fc_params->output_offset : 0
  1510. * @param[in] quant_params Per-tensor quantization info.
  1511. * It contains the multiplier and shift values to be applied to the output tensor.
  1512. * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  1513. * Input dimension is taken as Nx(H * W * C_IN)
  1514. * @param[in] input_data Input (activation) data pointer. Data type: int16
  1515. * @param[in] filter_dims Two dimensional filter dimensions. Format: [N, C]
  1516. * N : accumulation depth and equals (H * W * C_IN) from input_dims
  1517. * C : output depth and equals C_OUT in output_dims
  1518. * H & W : Not used
  1519. * @param[in] filter_data Filter data pointer. Data type: int8
  1520. * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
  1521. * N, H, W : Not used
  1522. * @param[in] bias_data Bias data pointer. Data type: int64
  1523. * @param[in] output_dims Output tensor dimensions. Format: [N, C_OUT]
  1524. * N : Batches
  1525. * C_OUT : Output depth
  1526. * H & W : Not used.
  1527. * @param[in, out] output_data Output data pointer. Data type: int16
  1528. * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
  1529. *
  1530. * @details
  1531. * - Supported framework: TensorFlow Lite
  1532. */
  1533. arm_cmsis_nn_status arm_fully_connected_s16(const cmsis_nn_context *ctx,
  1534. const cmsis_nn_fc_params *fc_params,
  1535. const cmsis_nn_per_tensor_quant_params *quant_params,
  1536. const cmsis_nn_dims *input_dims,
  1537. const int16_t *input_data,
  1538. const cmsis_nn_dims *filter_dims,
  1539. const int8_t *filter_data,
  1540. const cmsis_nn_dims *bias_dims,
  1541. const int64_t *bias_data,
  1542. const cmsis_nn_dims *output_dims,
  1543. int16_t *output_data);
  1544. /**
  1545. * @brief Get size of additional buffer required by arm_fully_connected_s16().
  1546. * @param[in] filter_dims dimension of filter
  1547. * @return The function returns required buffer size in bytes
  1548. *
  1549. */
  1550. int32_t arm_fully_connected_s16_get_buffer_size(const cmsis_nn_dims *filter_dims);
  1551. /**
  1552. * @brief Get size of additional buffer required by arm_fully_connected_s16() for processors with DSP extension.
  1553. * Refer to arm_fully_connected_s16_get_buffer_size() for function argument details.
  1554. *
  1555. * @note Intended for compilation on Host. If compiling for an Arm target, use
  1556. * arm_fully_connected_s16_get_buffer_size().
  1557. *
  1558. */
  1559. int32_t arm_fully_connected_s16_get_buffer_size_dsp(const cmsis_nn_dims *filter_dims);
  1560. /**
  1561. * @brief Get size of additional buffer required by arm_fully_connected_s16() for Arm(R) Helium Architecture case.
  1562. * Refer to arm_fully_connected_s16_get_buffer_size() for function argument details.
  1563. *
  1564. * @note Intended for compilation on Host. If compiling for an Arm target, use
  1565. * arm_fully_connected_s16_get_buffer_size().
  1566. *
  1567. */
  1568. int32_t arm_fully_connected_s16_get_buffer_size_mve(const cmsis_nn_dims *filter_dims);
  1569. /**
  1570. * @defgroup groupElementwise Elementwise Functions
  1571. *
  1572. * Elementwise add and multiplication functions.
  1573. *
  1574. */
  1575. /**
  1576. * @brief s8 elementwise add of two vectors
  1577. * @param[in] input_1_vect pointer to input vector 1
  1578. * @param[in] input_2_vect pointer to input vector 2
  1579. * @param[in] input_1_offset offset for input 1. Range: -127 to 128
  1580. * @param[in] input_1_mult multiplier for input 1
  1581. * @param[in] input_1_shift shift for input 1
  1582. * @param[in] input_2_offset offset for input 2. Range: -127 to 128
  1583. * @param[in] input_2_mult multiplier for input 2
  1584. * @param[in] input_2_shift shift for input 2
  1585. * @param[in] left_shift input left shift
  1586. * @param[in,out] output pointer to output vector
  1587. * @param[in] out_offset output offset. Range: -128 to 127
  1588. * @param[in] out_mult output multiplier
  1589. * @param[in] out_shift output shift
  1590. * @param[in] out_activation_min minimum value to clamp output to. Min: -128
  1591. * @param[in] out_activation_max maximum value to clamp output to. Max: 127
  1592. * @param[in] block_size number of samples
  1593. * @return The function returns ARM_CMSIS_NN_SUCCESS
  1594. */
  1595. arm_cmsis_nn_status arm_elementwise_add_s8(const int8_t *input_1_vect,
  1596. const int8_t *input_2_vect,
  1597. const int32_t input_1_offset,
  1598. const int32_t input_1_mult,
  1599. const int32_t input_1_shift,
  1600. const int32_t input_2_offset,
  1601. const int32_t input_2_mult,
  1602. const int32_t input_2_shift,
  1603. const int32_t left_shift,
  1604. int8_t *output,
  1605. const int32_t out_offset,
  1606. const int32_t out_mult,
  1607. const int32_t out_shift,
  1608. const int32_t out_activation_min,
  1609. const int32_t out_activation_max,
  1610. const int32_t block_size);
  1611. /**
  1612. * @brief s16 elementwise add of two vectors
  1613. * @param[in] input_1_vect pointer to input vector 1
  1614. * @param[in] input_2_vect pointer to input vector 2
  1615. * @param[in] input_1_offset offset for input 1. Not used.
  1616. * @param[in] input_1_mult multiplier for input 1
  1617. * @param[in] input_1_shift shift for input 1
  1618. * @param[in] input_2_offset offset for input 2. Not used.
  1619. * @param[in] input_2_mult multiplier for input 2
  1620. * @param[in] input_2_shift shift for input 2
  1621. * @param[in] left_shift input left shift
  1622. * @param[in,out] output pointer to output vector
  1623. * @param[in] out_offset output offset. Not used.
  1624. * @param[in] out_mult output multiplier
  1625. * @param[in] out_shift output shift
  1626. * @param[in] out_activation_min minimum value to clamp output to. Min: -32768
  1627. * @param[in] out_activation_max maximum value to clamp output to. Max: 32767
  1628. * @param[in] block_size number of samples
  1629. * @return The function returns ARM_CMSIS_NN_SUCCESS
  1630. */
  1631. arm_cmsis_nn_status arm_elementwise_add_s16(const int16_t *input_1_vect,
  1632. const int16_t *input_2_vect,
  1633. const int32_t input_1_offset,
  1634. const int32_t input_1_mult,
  1635. const int32_t input_1_shift,
  1636. const int32_t input_2_offset,
  1637. const int32_t input_2_mult,
  1638. const int32_t input_2_shift,
  1639. const int32_t left_shift,
  1640. int16_t *output,
  1641. const int32_t out_offset,
  1642. const int32_t out_mult,
  1643. const int32_t out_shift,
  1644. const int32_t out_activation_min,
  1645. const int32_t out_activation_max,
  1646. const int32_t block_size);
  1647. /**
  1648. * @brief s8 elementwise multiplication
  1649. * @param[in] input_1_vect pointer to input vector 1
  1650. * @param[in] input_2_vect pointer to input vector 2
  1651. * @param[in] input_1_offset offset for input 1. Range: -127 to 128
  1652. * @param[in] input_2_offset offset for input 2. Range: -127 to 128
  1653. * @param[in,out] output pointer to output vector
  1654. * @param[in] out_offset output offset. Range: -128 to 127
  1655. * @param[in] out_mult output multiplier
  1656. * @param[in] out_shift output shift
  1657. * @param[in] out_activation_min minimum value to clamp output to. Min: -128
  1658. * @param[in] out_activation_max maximum value to clamp output to. Max: 127
  1659. * @param[in] block_size number of samples
  1660. * @return The function returns ARM_CMSIS_NN_SUCCESS
  1661. *
  1662. * @details Supported framework: TensorFlow Lite micro
  1663. */
  1664. arm_cmsis_nn_status arm_elementwise_mul_s8(const int8_t *input_1_vect,
  1665. const int8_t *input_2_vect,
  1666. const int32_t input_1_offset,
  1667. const int32_t input_2_offset,
  1668. int8_t *output,
  1669. const int32_t out_offset,
  1670. const int32_t out_mult,
  1671. const int32_t out_shift,
  1672. const int32_t out_activation_min,
  1673. const int32_t out_activation_max,
  1674. const int32_t block_size);
  1675. /**
  1676. * @brief s16 elementwise multiplication
  1677. * @param[in] input_1_vect pointer to input vector 1
  1678. * @param[in] input_2_vect pointer to input vector 2
  1679. * @param[in] input_1_offset offset for input 1. Not used.
  1680. * @param[in] input_2_offset offset for input 2. Not used.
  1681. * @param[in,out] output pointer to output vector
  1682. * @param[in] out_offset output offset. Not used.
  1683. * @param[in] out_mult output multiplier
  1684. * @param[in] out_shift output shift
  1685. * @param[in] out_activation_min minimum value to clamp output to. Min: -32768
  1686. * @param[in] out_activation_max maximum value to clamp output to. Max: 32767
  1687. * @param[in] block_size number of samples
  1688. * @return The function returns ARM_CMSIS_NN_SUCCESS
  1689. *
  1690. * @details Supported framework: TensorFlow Lite micro
  1691. */
  1692. arm_cmsis_nn_status arm_elementwise_mul_s16(const int16_t *input_1_vect,
  1693. const int16_t *input_2_vect,
  1694. const int32_t input_1_offset,
  1695. const int32_t input_2_offset,
  1696. int16_t *output,
  1697. const int32_t out_offset,
  1698. const int32_t out_mult,
  1699. const int32_t out_shift,
  1700. const int32_t out_activation_min,
  1701. const int32_t out_activation_max,
  1702. const int32_t block_size);
  1703. /**
  1704. * @defgroup Acti Activation Functions
  1705. *
  1706. * Perform activation layers, including ReLU (Rectified Linear Unit),
  1707. * sigmoid and tanh
  1708. *
  1709. */
  1710. /**
  1711. * @brief Q7 RELU function
  1712. * @param[in,out] data pointer to input
  1713. * @param[in] size number of elements
  1714. */
  1715. void arm_relu_q7(int8_t *data, uint16_t size);
  1716. /**
  1717. * @brief s8 ReLU6 function
  1718. * @param[in,out] data pointer to input
  1719. * @param[in] size number of elements
  1720. */
  1721. void arm_relu6_s8(int8_t *data, uint16_t size);
  1722. /**
  1723. * @brief Q15 RELU function
  1724. * @param[in,out] data pointer to input
  1725. * @param[in] size number of elements
  1726. */
  1727. void arm_relu_q15(int16_t *data, uint16_t size);
  1728. /**
  1729. * @brief s16 neural network activation function using direct table look-up
  1730. * @param[in] input pointer to input data
  1731. * @param[out] output pointer to output
  1732. * @param[in] size number of elements
  1733. * @param[in] left_shift bit-width of the integer part, assumed to be smaller than 3.
  1734. * @param[in] type type of activation functions
  1735. * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
  1736. *
  1737. * @details Supported framework: TensorFlow Lite for Microcontrollers.
  1738. * This activation function must be bit precise congruent with the corresponding TFLM tanh and sigmoid activation
  1739. * functions
  1740. */
  1741. arm_cmsis_nn_status arm_nn_activation_s16(const int16_t *input,
  1742. int16_t *output,
  1743. const int32_t size,
  1744. const int32_t left_shift,
  1745. const arm_nn_activation_type type);
  1746. /**
  1747. * @defgroup Pooling Pooling Functions
  1748. *
  1749. * Perform max and average pooling operations
  1750. *
  1751. */
  1752. /**
  1753. * @brief s8 average pooling function.
  1754. *
  1755. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1756. * definition file to see if an additional buffer is required.
  1757. * Optional function {API}_get_buffer_size() provides the buffer
  1758. * size if an additional buffer is required.
  1759. * The caller is expected to clear the buffer, if applicable, for security reasons.
  1760. * @param[in] pool_params Pooling parameters
  1761. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  1762. * @param[in] input_data Input (activation) data pointer. Data type: int8
  1763. * @param[in] filter_dims Filter tensor dimensions. Format: [H, W]
  1764. * Argument N and C are not used.
  1765. * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT]
  1766. * Argument N is not used.
  1767. * C_OUT equals C_IN.
  1768. * @param[in, out] output_data Output data pointer. Data type: int8
  1769. *
  1770. * @return The function returns either
  1771. * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
  1772. * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
  1773. *
  1774. * @details
  1775. * - Supported Framework: TensorFlow Lite
  1776. *
  1777. */
  1778. arm_cmsis_nn_status arm_avgpool_s8(const cmsis_nn_context *ctx,
  1779. const cmsis_nn_pool_params *pool_params,
  1780. const cmsis_nn_dims *input_dims,
  1781. const int8_t *input_data,
  1782. const cmsis_nn_dims *filter_dims,
  1783. const cmsis_nn_dims *output_dims,
  1784. int8_t *output_data);
  1785. /**
  1786. * @brief Get the required buffer size for S8 average pooling function
  1787. * @param[in] dim_dst_width output tensor dimension
  1788. * @param[in] ch_src number of input tensor channels
  1789. * @return The function returns required buffer size in bytes
  1790. *
  1791. */
  1792. int32_t arm_avgpool_s8_get_buffer_size(const int dim_dst_width, const int ch_src);
  1793. /**
  1794. * @brief Get the required buffer size for S8 average pooling function for processors with DSP extension.
  1795. * Refer to arm_avgpool_s8_get_buffer_size() for function argument details.
  1796. *
  1797. * @note Intended for compilation on Host. If compiling for an Arm target, use
  1798. * arm_avgpool_s8_get_buffer_size().
  1799. *
  1800. */
  1801. int32_t arm_avgpool_s8_get_buffer_size_dsp(const int dim_dst_width, const int ch_src);
  1802. /**
  1803. * @brief Get the required buffer size for S8 average pooling function for Arm(R) Helium Architecture case.
  1804. * Refer to arm_avgpool_s8_get_buffer_size() for function argument details.
  1805. *
  1806. * @note Intended for compilation on Host. If compiling for an Arm target, use
  1807. * arm_avgpool_s8_get_buffer_size().
  1808. *
  1809. */
  1810. int32_t arm_avgpool_s8_get_buffer_size_mve(const int dim_dst_width, const int ch_src);
  1811. /**
  1812. * @brief s16 average pooling function.
  1813. *
  1814. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1815. * definition file to see if an additional buffer is required.
  1816. * Optional function {API}_get_buffer_size() provides the buffer
  1817. * size if an additional buffer is required.
  1818. * The caller is expected to clear the buffer, if applicable, for security reasons.
  1819. * @param[in] pool_params Pooling parameters
  1820. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  1821. * @param[in] input_data Input (activation) data pointer. Data type: int16
  1822. * @param[in] filter_dims Filter tensor dimensions. Format: [H, W]
  1823. * Argument N and C are not used.
  1824. * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT]
  1825. * Argument N is not used.
  1826. * C_OUT equals C_IN.
  1827. * @param[in, out] output_data Output data pointer. Data type: int16
  1828. *
  1829. * @return The function returns
  1830. * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
  1831. * <code>ARM_CMSIS_NN_ARG_ERROR</code> - In case of invalid arguments
  1832. *
  1833. * @details
  1834. * - Supported Framework: TensorFlow Lite
  1835. *
  1836. */
  1837. arm_cmsis_nn_status arm_avgpool_s16(const cmsis_nn_context *ctx,
  1838. const cmsis_nn_pool_params *pool_params,
  1839. const cmsis_nn_dims *input_dims,
  1840. const int16_t *input_data,
  1841. const cmsis_nn_dims *filter_dims,
  1842. const cmsis_nn_dims *output_dims,
  1843. int16_t *output_data);
  1844. /**
  1845. * @brief Get the required buffer size for S16 average pooling function
  1846. * @param[in] dim_dst_width output tensor dimension
  1847. * @param[in] ch_src number of input tensor channels
  1848. * @return The function returns required buffer size in bytes
  1849. *
  1850. */
  1851. int32_t arm_avgpool_s16_get_buffer_size(const int dim_dst_width, const int ch_src);
  1852. /**
  1853. * @brief Get the required buffer size for S16 average pooling function for processors with DSP extension.
  1854. * Refer to arm_avgpool_s16_get_buffer_size() for function argument details.
  1855. *
  1856. * @note Intended for compilation on Host. If compiling for an Arm target, use
  1857. * arm_avgpool_s16_get_buffer_size().
  1858. *
  1859. */
  1860. int32_t arm_avgpool_s16_get_buffer_size_dsp(const int dim_dst_width, const int ch_src);
  1861. /**
  1862. * @brief Get the required buffer size for S16 average pooling function for Arm(R) Helium Architecture case.
  1863. * Refer to arm_avgpool_s16_get_buffer_size() for function argument details.
  1864. *
  1865. * @note Intended for compilation on Host. If compiling for an Arm target, use
  1866. * arm_avgpool_s16_get_buffer_size().
  1867. *
  1868. */
  1869. int32_t arm_avgpool_s16_get_buffer_size_mve(const int dim_dst_width, const int ch_src);
  1870. /**
  1871. * @brief s8 max pooling function.
  1872. *
  1873. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1874. * definition file to see if an additional buffer is required.
  1875. * Optional function {API}_get_buffer_size() provides the buffer
  1876. * size if an additional buffer is required.
  1877. * The caller is expected to clear the buffer, if applicable, for security reasons.
  1878. * @param[in] pool_params Pooling parameters
  1879. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  1880. * @param[in] input_data Input (activation) data pointer. The input tensor must not
  1881. * overlap with the output tensor. Data type: int8
  1882. * @param[in] filter_dims Filter tensor dimensions. Format: [H, W]
  1883. * Argument N and C are not used.
  1884. * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT]
  1885. * Argument N is not used.
  1886. * C_OUT equals C_IN.
  1887. * @param[in, out] output_data Output data pointer. Data type: int8
  1888. *
  1889. * @return The function returns either
  1890. * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
  1891. * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
  1892. *
  1893. * @details
  1894. * - Supported Framework: TensorFlow Lite
  1895. *
  1896. */
  1897. arm_cmsis_nn_status arm_max_pool_s8(const cmsis_nn_context *ctx,
  1898. const cmsis_nn_pool_params *pool_params,
  1899. const cmsis_nn_dims *input_dims,
  1900. const int8_t *input_data,
  1901. const cmsis_nn_dims *filter_dims,
  1902. const cmsis_nn_dims *output_dims,
  1903. int8_t *output_data);
  1904. /**
  1905. * @brief s16 max pooling function.
  1906. *
  1907. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  1908. * definition file to see if an additional buffer is required.
  1909. * Optional function {API}_get_buffer_size() provides the buffer
  1910. * size if an additional buffer is required.
  1911. * The caller is expected to clear the buffer, if applicable, for security reasons.
  1912. * @param[in] pool_params Pooling parameters
  1913. * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN]
  1914. * @param[in] src Input (activation) data pointer. The input tensor must not
  1915. * overlap with the output tensor. Data type: int16
  1916. * @param[in] filter_dims Filter tensor dimensions. Format: [H, W]
  1917. * Argument N and C are not used.
  1918. * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT]
  1919. * Argument N is not used.
  1920. * C_OUT equals C_IN.
  1921. * @param[in, out] dst Output data pointer. Data type: int16
  1922. *
  1923. * @return The function returns either
  1924. * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
  1925. * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
  1926. *
  1927. * @details
  1928. * - Supported Framework: TensorFlow Lite
  1929. *
  1930. */
  1931. arm_cmsis_nn_status arm_max_pool_s16(const cmsis_nn_context *ctx,
  1932. const cmsis_nn_pool_params *pool_params,
  1933. const cmsis_nn_dims *input_dims,
  1934. const int16_t *src,
  1935. const cmsis_nn_dims *filter_dims,
  1936. const cmsis_nn_dims *output_dims,
  1937. int16_t *dst);
  1938. /**
  1939. * @defgroup Softmax Softmax Functions
  1940. *
  1941. *
  1942. */
  1943. /**
  1944. * @brief S8 softmax function
  1945. * @param[in] input Pointer to the input tensor
  1946. * @param[in] num_rows Number of rows in the input tensor
  1947. * @param[in] row_size Number of elements in each input row
  1948. * @param[in] mult Input quantization multiplier
  1949. * @param[in] shift Input quantization shift within the range [0, 31]
  1950. * @param[in] diff_min Minimum difference with max in row. Used to check if
  1951. * the quantized exponential operation can be performed
  1952. * @param[out] output Pointer to the output tensor
  1953. *
  1954. * @note Supported framework: TensorFlow Lite micro (bit-accurate)
  1955. *
  1956. */
  1957. void arm_softmax_s8(const int8_t *input,
  1958. const int32_t num_rows,
  1959. const int32_t row_size,
  1960. const int32_t mult,
  1961. const int32_t shift,
  1962. const int32_t diff_min,
  1963. int8_t *output);
  1964. /**
  1965. * @brief S8 to s16 softmax function
  1966. * @param[in] input Pointer to the input tensor
  1967. * @param[in] num_rows Number of rows in the input tensor
  1968. * @param[in] row_size Number of elements in each input row
  1969. * @param[in] mult Input quantization multiplier
  1970. * @param[in] shift Input quantization shift within the range [0, 31]
  1971. * @param[in] diff_min Minimum difference with max in row. Used to check if
  1972. * the quantized exponential operation can be performed
  1973. * @param[out] output Pointer to the output tensor
  1974. *
  1975. * @note Supported framework: TensorFlow Lite micro (bit-accurate)
  1976. *
  1977. */
  1978. void arm_softmax_s8_s16(const int8_t *input,
  1979. const int32_t num_rows,
  1980. const int32_t row_size,
  1981. const int32_t mult,
  1982. const int32_t shift,
  1983. const int32_t diff_min,
  1984. int16_t *output);
  1985. /**
  1986. * @brief S16 softmax function
  1987. * @param[in] input Pointer to the input tensor
  1988. * @param[in] num_rows Number of rows in the input tensor
  1989. * @param[in] row_size Number of elements in each input row
  1990. * @param[in] mult Input quantization multiplier
  1991. * @param[in] shift Input quantization shift within the range [0, 31]
  1992. * @param[in] softmax_params Softmax s16 layer parameters with two pointers to LUTs speficied below.
  1993. * For indexing the high 9 bits are used and 7 remaining for interpolation.
  1994. * That means 512 entries for the 9-bit indexing and 1 extra for interpolation, i.e. 513
  1995. * values for each LUT.
  1996. * - Lookup table for exp(x), where x uniform distributed between [-10.0 , 0.0]
  1997. * - Lookup table for 1 / (1 + x), where x uniform distributed between [0.0 , 1.0]
  1998. * @param[out] output Pointer to the output tensor
  1999. * @return The function returns
  2000. * <code>ARM_CMSIS_NN_ARG_ERROR</code> Argument error check failed
  2001. * <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
  2002. *
  2003. * @note Supported framework: TensorFlow Lite micro (bit-accurate)
  2004. *
  2005. */
  2006. arm_cmsis_nn_status arm_softmax_s16(const int16_t *input,
  2007. const int32_t num_rows,
  2008. const int32_t row_size,
  2009. const int32_t mult,
  2010. const int32_t shift,
  2011. const cmsis_nn_softmax_lut_s16 *softmax_params,
  2012. int16_t *output);
  2013. /**
  2014. * @brief U8 softmax function
  2015. * @param[in] input Pointer to the input tensor
  2016. * @param[in] num_rows Number of rows in the input tensor
  2017. * @param[in] row_size Number of elements in each input row
  2018. * @param[in] mult Input quantization multiplier
  2019. * @param[in] shift Input quantization shift within the range [0, 31]
  2020. * @param[in] diff_min Minimum difference with max in row. Used to check if
  2021. * the quantized exponential operation can be performed
  2022. * @param[out] output Pointer to the output tensor
  2023. *
  2024. * @note Supported framework: TensorFlow Lite micro (bit-accurate)
  2025. *
  2026. */
  2027. void arm_softmax_u8(const uint8_t *input,
  2028. const int32_t num_rows,
  2029. const int32_t row_size,
  2030. const int32_t mult,
  2031. const int32_t shift,
  2032. const int32_t diff_min,
  2033. uint8_t *output);
  2034. /**
  2035. * @defgroup Reshape Reshape Functions
  2036. *
  2037. */
  2038. /**
  2039. * @brief Reshape a s8 vector into another with different shape
  2040. * @param[in] input points to the s8 input vector
  2041. * @param[out] output points to the s8 output vector
  2042. * @param[in] total_size total size of the input and output vectors in bytes
  2043. *
  2044. * @note The output is expected to be in a memory area that does not overlap with the input's
  2045. *
  2046. */
  2047. void arm_reshape_s8(const int8_t *input, int8_t *output, const uint32_t total_size);
  2048. /**
  2049. * @defgroup Concatenation Concatenation Functions
  2050. *
  2051. */
  2052. /**
  2053. * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the X axis
  2054. * This function should be called for each input tensor to concatenate. The argument offset_x
  2055. * will be used to store the input tensor in the correct position in the output tensor
  2056. *
  2057. * i.e. offset_x = 0
  2058. * for(i = 0 i < num_input_tensors; ++i)
  2059. * {
  2060. * arm_concatenation_s8_x(&input[i], ..., &output, ..., ..., offset_x)
  2061. * offset_x += input_x[i]
  2062. * }
  2063. *
  2064. * This function assumes that the output tensor has:
  2065. * -# The same height of the input tensor
  2066. * -# The same number of channels of the input tensor
  2067. * -# The same batch size of the input tensor
  2068. *
  2069. * Unless specified otherwise, arguments are mandatory.
  2070. *
  2071. * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
  2072. * does not involve any arithmetic operation
  2073. *
  2074. * @param[in] input Pointer to input tensor. Input tensor must not overlap with the output tensor.
  2075. * @param[in] input_x Width of input tensor
  2076. * @param[in] input_y Height of input tensor
  2077. * @param[in] input_z Channels in input tensor
  2078. * @param[in] input_w Batch size in input tensor
  2079. * @param[out] output Pointer to output tensor. Expected to be at least
  2080. * (input_x * input_y * input_z * input_w) + offset_x
  2081. * bytes.
  2082. * @param[in] output_x Width of output tensor
  2083. * @param[in] offset_x The offset (in number of elements) on the X axis to start concatenating the input tensor
  2084. * It is user responsibility to provide the correct value
  2085. *
  2086. * <b> Input constraints</b>
  2087. * offset_x is less than output_x
  2088. *
  2089. */
  2090. void arm_concatenation_s8_x(const int8_t *input,
  2091. const uint16_t input_x,
  2092. const uint16_t input_y,
  2093. const uint16_t input_z,
  2094. const uint16_t input_w,
  2095. int8_t *output,
  2096. const uint16_t output_x,
  2097. const uint32_t offset_x);
  2098. /**
  2099. * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Y axis
  2100. * This function should be called for each input tensor to concatenate. The argument offset_y
  2101. * will be used to store the input tensor in the correct position in the output tensor
  2102. *
  2103. * i.e. offset_y = 0
  2104. * for(i = 0 i < num_input_tensors; ++i)
  2105. * {
  2106. * arm_concatenation_s8_y(&input[i], ..., &output, ..., ..., offset_y)
  2107. * offset_y += input_y[i]
  2108. * }
  2109. *
  2110. * This function assumes that the output tensor has:
  2111. * -# The same width of the input tensor
  2112. * -# The same number of channels of the input tensor
  2113. * -# The same batch size of the input tensor
  2114. *
  2115. * Unless specified otherwise, arguments are mandatory.
  2116. *
  2117. * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
  2118. * does not involve any arithmetic operation
  2119. *
  2120. * @param[in] input Pointer to input tensor. Input tensor must not overlap with the output tensor.
  2121. * @param[in] input_x Width of input tensor
  2122. * @param[in] input_y Height of input tensor
  2123. * @param[in] input_z Channels in input tensor
  2124. * @param[in] input_w Batch size in input tensor
  2125. * @param[out] output Pointer to output tensor. Expected to be at least
  2126. * (input_z * input_w * input_x * input_y) + offset_y
  2127. * bytes.
  2128. * @param[in] output_y Height of output tensor
  2129. * @param[in] offset_y The offset on the Y axis to start concatenating the input tensor
  2130. * It is user responsibility to provide the correct value
  2131. *
  2132. * <b> Input constraints</b>
  2133. * offset_y is less than output_y
  2134. *
  2135. */
  2136. void arm_concatenation_s8_y(const int8_t *input,
  2137. const uint16_t input_x,
  2138. const uint16_t input_y,
  2139. const uint16_t input_z,
  2140. const uint16_t input_w,
  2141. int8_t *output,
  2142. const uint16_t output_y,
  2143. const uint32_t offset_y);
  2144. /**
  2145. * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Z axis
  2146. * This function should be called for each input tensor to concatenate. The argument offset_z
  2147. * will be used to store the input tensor in the correct position in the output tensor
  2148. *
  2149. * i.e. offset_z = 0
  2150. * for(i = 0 i < num_input_tensors; ++i)
  2151. * {
  2152. * arm_concatenation_s8_z(&input[i], ..., &output, ..., ..., offset_z)
  2153. * offset_z += input_z[i]
  2154. * }
  2155. *
  2156. * This function assumes that the output tensor has:
  2157. * -# The same width of the input tensor
  2158. * -# The same height of the input tensor
  2159. * -# The same batch size of the input tensor
  2160. *
  2161. * Unless specified otherwise, arguments are mandatory.
  2162. *
  2163. * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
  2164. * does not involve any arithmetic operation
  2165. *
  2166. * @param[in] input Pointer to input tensor. Input tensor must not overlap with output tensor.
  2167. * @param[in] input_x Width of input tensor
  2168. * @param[in] input_y Height of input tensor
  2169. * @param[in] input_z Channels in input tensor
  2170. * @param[in] input_w Batch size in input tensor
  2171. * @param[out] output Pointer to output tensor. Expected to be at least
  2172. * (input_x * input_y * input_z * input_w) + offset_z
  2173. * bytes.
  2174. * @param[in] output_z Channels in output tensor
  2175. * @param[in] offset_z The offset on the Z axis to start concatenating the input tensor
  2176. * It is user responsibility to provide the correct value
  2177. *
  2178. * <b> Input constraints</b>
  2179. * offset_z is less than output_z
  2180. *
  2181. */
  2182. void arm_concatenation_s8_z(const int8_t *input,
  2183. const uint16_t input_x,
  2184. const uint16_t input_y,
  2185. const uint16_t input_z,
  2186. const uint16_t input_w,
  2187. int8_t *output,
  2188. const uint16_t output_z,
  2189. const uint32_t offset_z);
  2190. /**
  2191. * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the W axis (Batch size)
  2192. * This function should be called for each input tensor to concatenate. The argument offset_w
  2193. * will be used to store the input tensor in the correct position in the output tensor
  2194. *
  2195. * i.e. offset_w = 0
  2196. * for(i = 0 i < num_input_tensors; ++i)
  2197. * {
  2198. * arm_concatenation_s8_w(&input[i], ..., &output, ..., ..., offset_w)
  2199. * offset_w += input_w[i]
  2200. * }
  2201. *
  2202. * This function assumes that the output tensor has:
  2203. * -# The same width of the input tensor
  2204. * -# The same height of the input tensor
  2205. * -# The same number o channels of the input tensor
  2206. *
  2207. * Unless specified otherwise, arguments are mandatory.
  2208. *
  2209. * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
  2210. * does not involve any arithmetic operation
  2211. *
  2212. * @param[in] input Pointer to input tensor
  2213. * @param[in] input_x Width of input tensor
  2214. * @param[in] input_y Height of input tensor
  2215. * @param[in] input_z Channels in input tensor
  2216. * @param[in] input_w Batch size in input tensor
  2217. * @param[out] output Pointer to output tensor. Expected to be at least
  2218. * input_x * input_y * input_z * input_w
  2219. * bytes.
  2220. * @param[in] offset_w The offset on the W axis to start concatenating the input tensor
  2221. * It is user responsibility to provide the correct value
  2222. *
  2223. */
  2224. void arm_concatenation_s8_w(const int8_t *input,
  2225. const uint16_t input_x,
  2226. const uint16_t input_y,
  2227. const uint16_t input_z,
  2228. const uint16_t input_w,
  2229. int8_t *output,
  2230. const uint32_t offset_w);
  2231. /**
  2232. * @defgroup SVDF SVDF Functions
  2233. *
  2234. */
  2235. /**
  2236. * @brief s8 SVDF function with 8 bit state tensor and 8 bit time weights
  2237. *
  2238. * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function
  2239. * definition file to see if an additional buffer is required.
  2240. * Optional function arm_fully_connected_s8_get_buffer_size() provides the buffer
  2241. * size if an additional buffer is required.
  2242. * The caller is expected to clear the buffer, if applicable, for security reasons.
  2243. * @param[in] input_ctx Temporary scratch buffer
  2244. * The caller is expected to clear the buffer, if applicable, for security reasons.
  2245. * @param[in] output_ctx Temporary output scratch buffer
  2246. * The caller is expected to clear the buffer, if applicable, for security reasons.
  2247. * @param[in] svdf_params SVDF Parameters
  2248. * Range of svdf_params->input_offset : [-128, 127]
  2249. * Range of svdf_params->output_offset : [-128, 127]
  2250. * @param[in] input_quant_params Input quantization parameters
  2251. * @param[in] output_quant_params Output quantization parameters
  2252. * @param[in] input_dims Input tensor dimensions
  2253. * @param[in] input_data Pointer to input tensor
  2254. * @param[in] state_dims State tensor dimensions
  2255. * @param[in] state_data Pointer to state tensor
  2256. * @param[in] weights_feature_dims Weights (feature) tensor dimensions
  2257. * @param[in] weights_feature_data Pointer to the weights (feature) tensor
  2258. * @param[in] weights_time_dims Weights (time) tensor dimensions
  2259. * @param[in] weights_time_data Pointer to the weights (time) tensor
  2260. * @param[in] bias_dims Bias tensor dimensions
  2261. * @param[in] bias_data Pointer to bias tensor
  2262. * @param[in] output_dims Output tensor dimensions
  2263. * @param[out] output_data Pointer to the output tensor
  2264. *
  2265. * @return The function returns either
  2266. * <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
  2267. * <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
  2268. *
  2269. * @details
  2270. * 1. Supported framework: TensorFlow Lite micro
  2271. */
  2272. arm_cmsis_nn_status arm_svdf_s8(const cmsis_nn_context *ctx,
  2273. const cmsis_nn_context *input_ctx,
  2274. const cmsis_nn_context *output_ctx,
  2275. const cmsis_nn_svdf_params *svdf_params,
  2276. const cmsis_nn_per_tensor_quant_params *input_quant_params,
  2277. const cmsis_nn_per_tensor_quant_params *output_quant_params,
  2278. const cmsis_nn_dims *input_dims,
  2279. const int8_t *input_data,
  2280. const cmsis_nn_dims *state_dims,
  2281. int8_t *state_data,
  2282. const cmsis_nn_dims *weights_feature_dims,
  2283. const int8_t *weights_feature_data,
  2284. const cmsis_nn_dims *weights_time_dims,
  2285. const int8_t *weights_time_data,
  2286. const cmsis_nn_dims *bias_dims,
  2287. const int32_t *bias_data,
  2288. const cmsis_nn_dims *output_dims,
  2289. int8_t *output_data);
  2290. /**
  2291. * @brief s8 SVDF function with 16 bit state tensor and 16 bit time weights
  2292. *
  2293. * @param[in] input_ctx Temporary scratch buffer
  2294. * The caller is expected to clear the buffer, if applicable, for security reasons.
  2295. * @param[in] output_ctx Temporary output scratch buffer
  2296. * The caller is expected to clear the buffer, if applicable, for security reasons.
  2297. * @param[in] svdf_params SVDF Parameters
  2298. * Range of svdf_params->input_offset : [-128, 127]
  2299. * Range of svdf_params->output_offset : [-128, 127]
  2300. * @param[in] input_quant_params Input quantization parameters
  2301. * @param[in] output_quant_params Output quantization parameters
  2302. * @param[in] input_dims Input tensor dimensions
  2303. * @param[in] input_data Pointer to input tensor
  2304. * @param[in] state_dims State tensor dimensions
  2305. * @param[in] state_data Pointer to state tensor
  2306. * @param[in] weights_feature_dims Weights (feature) tensor dimensions
  2307. * @param[in] weights_feature_data Pointer to the weights (feature) tensor
  2308. * @param[in] weights_time_dims Weights (time) tensor dimensions
  2309. * @param[in] weights_time_data Pointer to the weights (time) tensor
  2310. * @param[in] bias_dims Bias tensor dimensions
  2311. * @param[in] bias_data Pointer to bias tensor
  2312. * @param[in] output_dims Output tensor dimensions
  2313. * @param[out] output_data Pointer to the output tensor
  2314. *
  2315. * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
  2316. *
  2317. * @details
  2318. * 1. Supported framework: TensorFlow Lite micro
  2319. */
  2320. arm_cmsis_nn_status arm_svdf_state_s16_s8(const cmsis_nn_context *input_ctx,
  2321. const cmsis_nn_context *output_ctx,
  2322. const cmsis_nn_svdf_params *svdf_params,
  2323. const cmsis_nn_per_tensor_quant_params *input_quant_params,
  2324. const cmsis_nn_per_tensor_quant_params *output_quant_params,
  2325. const cmsis_nn_dims *input_dims,
  2326. const int8_t *input_data,
  2327. const cmsis_nn_dims *state_dims,
  2328. int16_t *state_data,
  2329. const cmsis_nn_dims *weights_feature_dims,
  2330. const int8_t *weights_feature_data,
  2331. const cmsis_nn_dims *weights_time_dims,
  2332. const int16_t *weights_time_data,
  2333. const cmsis_nn_dims *bias_dims,
  2334. const int32_t *bias_data,
  2335. const cmsis_nn_dims *output_dims,
  2336. int8_t *output_data);
  2337. /**
  2338. * @defgroup LSTM LSTM Layer Functions
  2339. *
  2340. */
  2341. /**
  2342. * @brief LSTM unidirectional function with 8 bit input and output and 16 bit gate output.
  2343. *
  2344. * @param[in] input Pointer to input data
  2345. * @param[out] output Pointer to output data
  2346. * @param[in] params Struct containing all information about the lstm operator, see arm_nn_types.
  2347. * @param[in] buffers Struct containing pointers to all temporary scratch buffers needed for the
  2348. * lstm operator, see arm_nn_types.
  2349. *
  2350. *
  2351. * @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
  2352. *
  2353. * @details
  2354. * 1. Supported framework: TensorFlow Lite Micro
  2355. *
  2356. */
  2357. arm_cmsis_nn_status arm_lstm_unidirectional_s8(const int8_t *input,
  2358. int8_t *output,
  2359. const cmsis_nn_lstm_params *params,
  2360. cmsis_nn_lstm_context *buffers);
  2361. /**
  2362. * @brief Get size of additional buffer required by arm_svdf_s8().
  2363. * @param[in] filter_dims dimension of filter
  2364. * @return The function returns required buffer size in bytes
  2365. *
  2366. */
  2367. int32_t arm_svdf_s8_get_buffer_size(const cmsis_nn_dims *filter_dims);
  2368. /**
  2369. * @brief Get size of additional buffer required by arm_svdf_s8() for processors with DSP extension.
  2370. * Refer to arm_svdf_s8_get_buffer_size() for function argument details.
  2371. *
  2372. * @note Intended for compilation on Host. If compiling for an Arm target, use
  2373. * arm_svdf_s8_get_buffer_size().
  2374. *
  2375. */
  2376. int32_t arm_svdf_s8_get_buffer_size_dsp(const cmsis_nn_dims *filter_dims);
  2377. /**
  2378. * @brief Get size of additional buffer required by arm_svdf_s8() for Arm(R) Helium Architecture case.
  2379. * Refer to arm_svdf_s8_get_buffer_size() for function argument details.
  2380. *
  2381. * @note Intended for compilation on Host. If compiling for an Arm target, use
  2382. * arm_svdf_s8_get_buffer_size().
  2383. *
  2384. */
  2385. int32_t arm_svdf_s8_get_buffer_size_mve(const cmsis_nn_dims *filter_dims);
  2386. #ifdef __cplusplus
  2387. }
  2388. #endif
  2389. #endif /* ARM_NNFUNCTIONS_H */