kpu.h 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972
  1. /* Copyright 2018 Canaan Inc.
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. #ifndef _KPU_H
  16. #define _KPU_H
  17. #include <plic.h>
  18. #include <stdint.h>
  19. #include "dmac.h"
  20. #ifdef __cplusplus
  21. extern "C" {
  22. #endif
  23. #define kpu_matmul_begin kpu_conv2d_output
  24. #define IOMEM 0x40000000
  25. typedef int (*plic_irq_callback_t)(void *ctx);
  26. typedef struct
  27. {
  28. union
  29. {
  30. uint64_t reg;
  31. struct
  32. {
  33. uint64_t int_en : 1;
  34. uint64_t ram_flag : 1;
  35. uint64_t full_add : 1;
  36. uint64_t depth_wise_layer : 1;
  37. uint64_t reserved : 60;
  38. } data;
  39. } interrupt_enabe;
  40. union
  41. {
  42. uint64_t reg;
  43. struct
  44. {
  45. uint64_t image_src_addr : 15;
  46. uint64_t reserved0 : 17;
  47. uint64_t image_dst_addr : 15;
  48. uint64_t reserved1 : 17;
  49. } data;
  50. } image_addr;
  51. union
  52. {
  53. uint64_t reg;
  54. struct
  55. {
  56. uint64_t i_ch_num : 10;
  57. uint64_t reserved0 : 22;
  58. uint64_t o_ch_num : 10;
  59. uint64_t reserved1 : 6;
  60. uint64_t o_ch_num_coef : 10;
  61. uint64_t reserved2 : 6;
  62. } data;
  63. } image_channel_num;
  64. union
  65. {
  66. uint64_t reg;
  67. struct
  68. {
  69. uint64_t i_row_wid : 10;
  70. uint64_t i_col_high : 9;
  71. uint64_t reserved0 : 13;
  72. uint64_t o_row_wid : 10;
  73. uint64_t o_col_high : 9;
  74. uint64_t reserved1 : 13;
  75. } data;
  76. } image_size;
  77. union
  78. {
  79. uint64_t reg;
  80. struct
  81. {
  82. uint64_t kernel_type : 3;
  83. uint64_t pad_type : 1;
  84. uint64_t pool_type : 4;
  85. uint64_t first_stride : 1;
  86. uint64_t bypass_conv : 1;
  87. uint64_t load_para : 1;
  88. uint64_t reserved0 : 5;
  89. uint64_t dma_burst_size : 8;
  90. uint64_t pad_value : 8;
  91. uint64_t bwsx_base_addr : 32;
  92. } data;
  93. } kernel_pool_type_cfg;
  94. union
  95. {
  96. uint64_t reg;
  97. struct
  98. {
  99. uint64_t load_coor : 1;
  100. uint64_t load_time : 6;
  101. uint64_t reserved0 : 8;
  102. uint64_t para_size : 17;
  103. uint64_t para_start_addr : 32;
  104. } data;
  105. } kernel_load_cfg;
  106. union
  107. {
  108. uint64_t reg;
  109. struct
  110. {
  111. uint64_t coef_column_offset : 4;
  112. uint64_t coef_row_offset : 12;
  113. uint64_t reserved0 : 48;
  114. } data;
  115. } kernel_offset;
  116. union
  117. {
  118. uint64_t reg;
  119. struct
  120. {
  121. uint64_t channel_switch_addr : 15;
  122. uint64_t reserved : 1;
  123. uint64_t row_switch_addr : 4;
  124. uint64_t coef_size : 8;
  125. uint64_t coef_group : 3;
  126. uint64_t load_act : 1;
  127. uint64_t active_addr : 32;
  128. } data;
  129. } kernel_calc_type_cfg;
  130. union
  131. {
  132. uint64_t reg;
  133. struct
  134. {
  135. uint64_t wb_channel_switch_addr : 15;
  136. uint64_t reserved0 : 1;
  137. uint64_t wb_row_switch_addr : 4;
  138. uint64_t wb_group : 3;
  139. uint64_t reserved1 : 41;
  140. } data;
  141. } write_back_cfg;
  142. union
  143. {
  144. uint64_t reg;
  145. struct
  146. {
  147. uint64_t shr_w : 4;
  148. uint64_t shr_x : 4;
  149. uint64_t arg_w : 24;
  150. uint64_t arg_x : 24;
  151. uint64_t reserved0 : 8;
  152. } data;
  153. } conv_value;
  154. union
  155. {
  156. uint64_t reg;
  157. struct
  158. {
  159. uint64_t arg_add : 40;
  160. uint64_t reserved : 24;
  161. } data;
  162. } conv_value2;
  163. union
  164. {
  165. uint64_t reg;
  166. struct
  167. {
  168. uint64_t send_data_out : 1;
  169. uint64_t reserved : 15;
  170. uint64_t channel_byte_num : 16;
  171. uint64_t dma_total_byte : 32;
  172. } data;
  173. } dma_parameter;
  174. } kpu_layer_argument_t;
  175. typedef struct
  176. {
  177. union
  178. {
  179. uint64_t reg;
  180. struct
  181. {
  182. uint64_t shift_number : 8;
  183. uint64_t y_mul : 16;
  184. uint64_t x_start : 36;
  185. } data;
  186. } activate_para[16];
  187. union
  188. {
  189. uint64_t reg;
  190. struct
  191. {
  192. uint8_t result_bias[8];
  193. } data;
  194. } activate_para_bias0;
  195. union
  196. {
  197. uint64_t reg;
  198. struct
  199. {
  200. uint8_t result_bias[8];
  201. } data;
  202. } activate_para_bias1;
  203. } kpu_activate_table_t;
  204. typedef struct
  205. {
  206. union
  207. {
  208. uint64_t reg;
  209. struct
  210. {
  211. uint64_t norm_mul : 24;
  212. uint64_t norm_add : 32;
  213. uint64_t norm_shift : 4;
  214. } data;
  215. } batchnorm;
  216. } kpu_batchnorm_argument_t;
  217. typedef struct
  218. {
  219. union
  220. {
  221. uint64_t reg;
  222. struct
  223. {
  224. uint16_t weight[9];
  225. } data;
  226. } weights;
  227. } kpu_weights_kernel_16_3x3_t;
  228. typedef struct
  229. {
  230. uint64_t calc_done_int : 1;
  231. uint64_t layer_cfg_almost_empty_int : 1;
  232. uint64_t layer_cfg_almost_full_int : 1;
  233. uint64_t reserved : 61;
  234. } kpu_config_interrupt_t;
  235. typedef struct
  236. {
  237. uint64_t fifo_full_threshold : 4;
  238. uint64_t fifo_empty_threshold : 4;
  239. uint64_t reserved : 56;
  240. } kpu_config_fifo_threshold_t;
  241. typedef struct
  242. {
  243. uint64_t dma_fifo_flush_n : 1;
  244. uint64_t gs_fifo_flush_n : 1;
  245. uint64_t cfg_fifo_flush_n : 1;
  246. uint64_t cmd_fifo_flush_n : 1;
  247. uint64_t resp_fifo_flush_n : 1;
  248. uint64_t reserved : 59;
  249. } kpu_config_fifo_ctrl_t;
  250. typedef struct
  251. {
  252. uint64_t eight_bit_mode : 1;
  253. uint64_t reserved : 63;
  254. } kpu_config_eight_bit_mode_t;
  255. typedef struct
  256. {
  257. volatile uint64_t layer_argument_fifo;
  258. volatile union
  259. {
  260. uint64_t reg;
  261. kpu_config_interrupt_t data;
  262. } interrupt_status;
  263. volatile union
  264. {
  265. uint64_t reg;
  266. kpu_config_interrupt_t data;
  267. } interrupt_raw;
  268. volatile union
  269. {
  270. uint64_t reg;
  271. kpu_config_interrupt_t data;
  272. } interrupt_mask;
  273. volatile union
  274. {
  275. uint64_t reg;
  276. kpu_config_interrupt_t data;
  277. } interrupt_clear;
  278. volatile union
  279. {
  280. uint64_t reg;
  281. kpu_config_fifo_threshold_t data;
  282. } fifo_threshold;
  283. volatile uint64_t fifo_data_out;
  284. volatile union
  285. {
  286. uint64_t reg;
  287. kpu_config_fifo_ctrl_t data;
  288. } fifo_ctrl;
  289. volatile union
  290. {
  291. uint64_t reg;
  292. kpu_config_eight_bit_mode_t data;
  293. } eight_bit_mode;
  294. } kpu_config_t;
  295. typedef struct
  296. {
  297. kpu_layer_argument_t *layers;
  298. kpu_layer_argument_t *remain_layers;
  299. plic_irq_callback_t callback;
  300. void *ctx;
  301. uint64_t *src;
  302. uint64_t *dst;
  303. uint32_t src_length;
  304. uint32_t dst_length;
  305. uint32_t layers_length;
  306. uint32_t remain_layers_length;
  307. dmac_channel_number_t dma_ch;
  308. uint32_t eight_bit_mode;
  309. float output_scale;
  310. float output_bias;
  311. float input_scale;
  312. float input_bias;
  313. } kpu_task_t;
  314. typedef struct
  315. {
  316. uint32_t version;
  317. uint32_t flags;
  318. uint32_t arch;
  319. uint32_t layers_length;
  320. uint32_t max_start_address;
  321. uint32_t main_mem_usage;
  322. uint32_t output_count;
  323. } kpu_kmodel_header_t;
  324. typedef struct
  325. {
  326. uint32_t version;
  327. uint32_t flags;
  328. uint32_t layers_length;
  329. uint32_t max_start_address;
  330. uint32_t layers_argument_start;
  331. } kpu_model_header_t;
  332. typedef struct
  333. {
  334. uint32_t address;
  335. uint32_t size;
  336. } kpu_model_output_t;
  337. typedef enum
  338. {
  339. KL_INVALID = 0,
  340. KL_ADD,
  341. KL_QUANTIZED_ADD,
  342. KL_GLOBAL_MAX_POOL2D,
  343. KL_QUANTIZED_GLOBAL_MAX_POOL2D,
  344. KL_GLOBAL_AVERAGE_POOL2D,
  345. KL_QUANTIZED_GLOBAL_AVERAGE_POOL2D,
  346. KL_MAX_POOL2D,
  347. KL_QUANTIZED_MAX_POOL2D,
  348. KL_AVERAGE_POOL2D,
  349. KL_QUANTIZED_AVERAGE_POOL2D,
  350. KL_QUANTIZE,
  351. KL_DEQUANTIZE,
  352. KL_REQUANTIZE,
  353. KL_L2_NORMALIZATION,
  354. KL_SOFTMAX,
  355. KL_CONCAT,
  356. KL_QUANTIZED_CONCAT,
  357. KL_FULLY_CONNECTED,
  358. KL_QUANTIZED_FULLY_CONNECTED,
  359. KL_TENSORFLOW_FLATTEN,
  360. KL_QUANTIZED_TENSORFLOW_FLATTEN,
  361. KL_RESIZE_NEAREST_NEIGHBOR,
  362. KL_QUANTIZED_RESIZE_NEAREST_NEIGHBOR,
  363. KL_CHANNELWISE_DEQUANTIZE,
  364. KL_LOGISTIC,
  365. KL_K210_CONV = 10240,
  366. KL_K210_ADD_PADDING,
  367. KL_K210_REMOVE_PADDING,
  368. KL_K210_UPLOAD
  369. } kpu_model_layer_type_t;
  370. typedef struct
  371. {
  372. uint32_t type;
  373. uint32_t body_size;
  374. } kpu_model_layer_header_t;
  375. typedef enum
  376. {
  377. KLF_NONE = 0,
  378. KLF_MAIN_MEM_OUT = 1
  379. } kpu_model_layer_flags_t;
  380. typedef enum
  381. {
  382. KLP_SAME = 0,
  383. KLP_VALID = 1
  384. } kpu_model_padding_t;
  385. typedef enum
  386. {
  387. KLA_LINEAR = 0,
  388. KLA_RELU = 1,
  389. KLA_RELU6 = 2
  390. } kpu_model_activation_t;
  391. typedef struct
  392. {
  393. float scale;
  394. float bias;
  395. } kpu_model_quant_param_t;
  396. typedef struct
  397. {
  398. uint32_t width;
  399. uint32_t height;
  400. uint32_t channels;
  401. } kpu_model_shape_t;
  402. typedef struct
  403. {
  404. uint32_t start;
  405. uint32_t size;
  406. } kpu_model_memory_range_t;
  407. typedef struct
  408. {
  409. uint32_t flags;
  410. uint32_t main_mem_out_address;
  411. uint32_t layer_offset;
  412. uint32_t weights_offset;
  413. uint32_t bn_offset;
  414. uint32_t act_offset;
  415. } kpu_model_conv_layer_argument_t;
  416. typedef struct
  417. {
  418. uint32_t flags;
  419. uint32_t main_mem_in_a_address;
  420. uint32_t main_mem_in_b_address;
  421. uint32_t main_mem_out_address;
  422. uint32_t count;
  423. } kpu_model_add_layer_argument_t;
  424. typedef struct
  425. {
  426. uint32_t flags;
  427. uint32_t main_mem_in_a_address;
  428. uint32_t main_mem_in_b_address;
  429. uint32_t main_mem_out_address;
  430. uint32_t count;
  431. int32_t in_a_offset;
  432. int32_t in_a_mul;
  433. int32_t in_a_shift;
  434. int32_t in_b_offset;
  435. int32_t in_b_mul;
  436. int32_t in_b_shift;
  437. int32_t out_offset;
  438. int32_t out_mul;
  439. int32_t out_shift;
  440. } kpu_model_quant_add_layer_argument_t;
  441. typedef struct
  442. {
  443. uint32_t flags;
  444. uint32_t main_mem_in_address;
  445. uint32_t main_mem_out_address;
  446. uint32_t kernel_size;
  447. uint32_t channels;
  448. } kpu_model_gap2d_layer_argument_t;
  449. typedef struct
  450. {
  451. uint32_t flags;
  452. uint32_t main_mem_in_address;
  453. uint32_t main_mem_out_address;
  454. kpu_model_shape_t in_shape;
  455. kpu_model_shape_t out_shape;
  456. uint32_t kernel_width;
  457. uint32_t kernel_height;
  458. uint32_t stride_width;
  459. uint32_t stride_height;
  460. uint32_t padding_width;
  461. uint32_t padding_height;
  462. } kpu_model_quant_max_pool2d_layer_argument_t;
  463. typedef struct
  464. {
  465. uint32_t flags;
  466. uint32_t main_mem_in_address;
  467. uint32_t main_mem_out_address;
  468. kpu_model_shape_t in_shape;
  469. kpu_model_shape_t out_shape;
  470. uint32_t kernel_width;
  471. uint32_t kernel_height;
  472. uint32_t stride_width;
  473. uint32_t stride_height;
  474. uint32_t padding_width;
  475. uint32_t padding_height;
  476. kpu_model_activation_t act;
  477. } kpu_model_ave_pool2d_layer_argument_t;
  478. typedef struct
  479. {
  480. uint32_t flags;
  481. uint32_t main_mem_in_address;
  482. uint32_t mem_out_address;
  483. uint32_t count;
  484. kpu_model_quant_param_t quant_param;
  485. } kpu_model_quantize_layer_argument_t;
  486. typedef struct
  487. {
  488. uint32_t flags;
  489. uint32_t main_mem_in_address;
  490. uint32_t main_mem_out_address;
  491. uint32_t count;
  492. kpu_model_quant_param_t quant_param;
  493. } kpu_model_dequantize_layer_argument_t;
  494. typedef struct
  495. {
  496. uint32_t flags;
  497. uint32_t main_mem_in_address;
  498. uint32_t main_mem_out_address;
  499. uint32_t count;
  500. uint8_t table[256];
  501. } kpu_model_requantize_layer_argument_t;
  502. typedef struct
  503. {
  504. uint32_t flags;
  505. uint32_t main_mem_in_address;
  506. uint32_t kpu_mem_out_address;
  507. uint32_t channels;
  508. } kpu_model_add_padding_layer_argument_t;
  509. typedef struct
  510. {
  511. uint32_t flags;
  512. uint32_t main_mem_in_address;
  513. uint32_t main_mem_out_address;
  514. uint32_t channels;
  515. } kpu_model_remove_padding_layer_argument_t;
  516. typedef struct
  517. {
  518. uint32_t flags;
  519. uint32_t main_mem_in_address;
  520. uint32_t kpu_mem_out_address;
  521. uint32_t width;
  522. uint32_t height;
  523. uint32_t channels;
  524. } kpu_model_upload_layer_argument_t;
  525. typedef struct
  526. {
  527. uint32_t flags;
  528. uint32_t main_mem_in_address;
  529. uint32_t main_mem_out_address;
  530. uint32_t channels;
  531. } kpu_model_l2_norm_layer_argument_t;
  532. typedef struct
  533. {
  534. uint32_t flags;
  535. uint32_t main_mem_in_address;
  536. uint32_t main_mem_out_address;
  537. uint32_t channels;
  538. } kpu_model_softmax_layer_argument_t;
  539. typedef struct
  540. {
  541. uint32_t flags;
  542. uint32_t main_mem_out_address;
  543. uint32_t input_count;
  544. kpu_model_memory_range_t inputs_mem[0];
  545. } kpu_model_concat_layer_argument_t;
  546. typedef struct
  547. {
  548. uint32_t flags;
  549. uint32_t main_mem_in_address;
  550. uint32_t main_mem_out_address;
  551. uint32_t in_channels;
  552. uint32_t out_channels;
  553. kpu_model_activation_t act;
  554. float weights[0];
  555. } kpu_model_fully_connected_layer_argument_t;
  556. typedef struct
  557. {
  558. uint32_t flags;
  559. uint32_t main_mem_in_address;
  560. uint32_t main_mem_out_address;
  561. kpu_model_shape_t shape;
  562. } kpu_model_tf_flatten_layer_argument_t;
  563. typedef struct
  564. {
  565. uint32_t flags;
  566. uint32_t main_mem_in_address;
  567. uint32_t main_mem_out_address;
  568. kpu_model_shape_t in_shape;
  569. uint32_t out_width;
  570. uint32_t out_height;
  571. uint32_t align_corners;
  572. } kpu_model_resize_nearest_neighbor_layer_argument_t;
  573. typedef struct
  574. {
  575. uint32_t flags;
  576. uint32_t main_mem_in_address;
  577. uint32_t main_mem_out_address;
  578. kpu_model_shape_t in_shape;
  579. uint32_t out_width;
  580. uint32_t out_height;
  581. uint32_t align_corners;
  582. } kpu_model_quant_resize_nearest_neighbor_layer_argument_t;
  583. typedef struct
  584. {
  585. uint32_t flags;
  586. uint32_t main_mem_in_address;
  587. uint32_t main_mem_out_address;
  588. uint32_t channels;
  589. uint32_t channel_size;
  590. kpu_model_quant_param_t quant_params[0];
  591. } kpu_model_channelwise_dequant_argument_t;
  592. typedef struct
  593. {
  594. uint32_t flags;
  595. uint32_t main_mem_in_address;
  596. uint32_t main_mem_out_address;
  597. uint32_t channels;
  598. } kpu_model_logistic_layer_argument_t;
  599. typedef void (*kpu_done_callback_t)(void *userdata);
  600. typedef struct
  601. {
  602. int is_nncase;
  603. union
  604. {
  605. struct
  606. {
  607. const uint8_t *model_buffer;
  608. uint8_t *main_buffer;
  609. uint32_t output_count;
  610. const kpu_model_output_t *outputs;
  611. const kpu_model_layer_header_t *layer_headers;
  612. const uint8_t *body_start;
  613. uint32_t layers_length;
  614. volatile uint32_t current_layer;
  615. const uint8_t *volatile current_body;
  616. dmac_channel_number_t dma_ch;
  617. kpu_done_callback_t done_callback;
  618. void *userdata;
  619. };
  620. struct
  621. {
  622. void* nncase_ctx;
  623. uint32_t nncase_version;
  624. };
  625. };
  626. } kpu_model_context_t;
  627. typedef struct
  628. {
  629. uint32_t weigths_offset;
  630. uint32_t bn_offset;
  631. uint32_t act_offset;
  632. float input_scale;
  633. float input_bias;
  634. float output_scale;
  635. float output_bias;
  636. } kpu_model_layer_metadata_t;
  637. typedef struct _quantize_param
  638. {
  639. float scale;
  640. float bias;
  641. } quantize_param_t;
  642. extern volatile kpu_config_t *const kpu;
  643. /**
  644. * @brief Modle complier init kpu handler
  645. *
  646. * @param[in] task Kpu handler
  647. *
  648. * @return Kpu handler
  649. */
  650. extern kpu_task_t *kpu_task_init(kpu_task_t *task);
  651. /**
  652. * @brief Kpu run for AI
  653. *
  654. * @param[in] task Kpu handler
  655. * @param[in] dma_ch DMA for kpu
  656. * @param[in] src The picture data
  657. * @param[in] dest The result of kpu
  658. * @param[in] callback The callback of kpu
  659. *
  660. * @return result
  661. * - 0 Success
  662. * - Other Fail.Kpu is busy.
  663. */
  664. int kpu_run(kpu_task_t *task, dmac_channel_number_t dma_ch, const void *src, void *dest, plic_irq_callback_t callback);
  665. /**
  666. * @brief Get kpu result buf
  667. *
  668. * @param[in] task Kpu handler
  669. *
  670. * @return Kpu result buf
  671. */
  672. uint8_t *kpu_get_output_buf(kpu_task_t *task);
  673. /**
  674. * @brief Release kpu output buf
  675. *
  676. * @param[in] output_buf Kpu output buf
  677. *
  678. */
  679. void kpu_release_output_buf(uint8_t *output_buf);
  680. /**
  681. * @brief Kpu run for AI
  682. *
  683. * @param[in] task Kpu handler
  684. *
  685. * @return result
  686. * - 0 Success
  687. * - Other Fail.Kpu is busy.
  688. */
  689. int kpu_start(kpu_task_t *task);
  690. /**
  691. * @brief Initialize kpu handler
  692. *
  693. * @param[in] task Kpu handler
  694. *
  695. * @return result
  696. * - 0 Success
  697. * - Other Fail.
  698. */
  699. int kpu_single_task_init(kpu_task_t *task);
  700. /**
  701. * @brief Uninitialize kpu handler
  702. *
  703. * @param[in] task Kpu handler
  704. *
  705. * @return result
  706. * - 0 Success
  707. * - Other Fail.
  708. */
  709. int kpu_single_task_deinit(kpu_task_t *task);
  710. /**
  711. * @brief Load kmodel and init kpu task
  712. *
  713. * @param[in] task Kpu handler
  714. * @param[in] buffer Kmodel
  715. * @param[in] meta Test data
  716. *
  717. * @return result
  718. * - 0 Success
  719. * - Other Fail.
  720. */
  721. int kpu_model_load_from_buffer(kpu_task_t *task, uint8_t *buffer, kpu_model_layer_metadata_t **meta);
  722. /**
  723. * @brief Kpu initialize
  724. *
  725. * @param[in] eight_bit_mode 0:16bit mode 1:8bit mode
  726. * @param[in] callback Callback of kpu
  727. * @param[in] userdata Data of callback
  728. *
  729. */
  730. void kpu_init(int eight_bit_mode, plic_irq_callback_t callback, void *userdata);
  731. /**
  732. * @brief Kpu input data by dma
  733. *
  734. * @param[in] layer Kpu task layer
  735. * @param[in] src Image data
  736. * @param[in] dma_ch Dmac channel
  737. * @param[in] callback Dmac complete callback
  738. * @param[in] userdata Data of callback
  739. *
  740. */
  741. void kpu_input_dma(const kpu_layer_argument_t *layer, const uint8_t *src, dmac_channel_number_t dma_ch, plic_irq_callback_t callback, void *userdata);
  742. /**
  743. * @brief Kpu input data by cpu
  744. *
  745. * @param[in] layer Kpu task layer
  746. * @param[in] src Image data
  747. * @param[in] width Image width
  748. * @param[in] height Image heigth
  749. * @param[in] channels Color channel, RGB is 3
  750. *
  751. */
  752. void kpu_input_with_padding(kpu_layer_argument_t *layer, const uint8_t *src, int width, int height, int channels);
  753. /**
  754. * @brief Kpu run only one layer
  755. *
  756. * @param[in] layer Kpu task layer
  757. *
  758. */
  759. void kpu_conv2d(kpu_layer_argument_t *layer);
  760. /**
  761. * @brief Kpu run only one layer then get the result by dma
  762. *
  763. * @param[in] layer Kpu task layer
  764. * @param[in] dma_ch Dmac channel
  765. * @param[in] dest Result
  766. * @param[in] callback Dmac complete callback
  767. * @param[in] userdata Data of callback
  768. *
  769. */
  770. void kpu_conv2d_output(kpu_layer_argument_t *layer, dmac_channel_number_t dma_ch, uint8_t *dest, plic_irq_callback_t callback, void *userdata);
  771. /**
  772. * @brief Kpu pooling
  773. *
  774. * @param[in] src Source
  775. * @param[in] src_param Source param
  776. * @param[in] kernel_size Kernel size, 7*7 is 49
  777. * @param[in] channels Channels
  778. * @param[in] dest Dest
  779. * @param[in] dest_param Dest param
  780. *
  781. */
  782. void kpu_global_average_pool(const uint8_t *src, const quantize_param_t *src_param, int kernel_size, int channels, uint8_t *dest, const quantize_param_t *dest_param);
  783. /**
  784. * @brief Kpu pooling
  785. *
  786. * @param[in] src Source
  787. * @param[in] src_param Source param
  788. * @param[in] kernel_size Kernel size, 7*7 is 49
  789. * @param[in] channels Channels
  790. * @param[in] dest Dest
  791. *
  792. */
  793. void kpu_global_average_pool_float(const uint8_t *src, const quantize_param_t *src_param, int kernel_size, int channels, float *dest);
  794. /**
  795. * @brief Kpu fullly connected by cpu
  796. *
  797. * @param[in] src Source
  798. * @param[in] weights Weight
  799. * @param[in] biases Biases
  800. * @param[in] dest Dest
  801. * @param[in] input_channels Input channels
  802. * @param[in] output_channels Output channels
  803. *
  804. */
  805. void kpu_fully_connected(const float *src, const float *weights, const float *biases, float *dest, int input_channels, int output_channels);
  806. /**
  807. * @brief Kpu matrix multiplication
  808. *
  809. * @param[in] src Source
  810. * @param[in] channels Channels
  811. * @param[in] dest Dest
  812. * @param[in] dest_param Dest param
  813. *
  814. */
  815. void kpu_matmul_end(const uint8_t *src, int channels, float *dest, const quantize_param_t *dest_param);
  816. /**
  817. * @brief Kpu dequantize
  818. *
  819. * @param[in] src Source
  820. * @param[in] src_param Source param
  821. * @param[in] count Dequantize count
  822. * @param[in] dest Dest
  823. *
  824. */
  825. void kpu_dequantize(const uint8_t *src, const quantize_param_t *src_param, size_t count, float *dest);
  826. /**
  827. * @brief Kpu load kmodel
  828. *
  829. * @param[in] ctx Kmodel object
  830. * @param[in] buffer Kmodel buffer
  831. *
  832. * @return result
  833. * - 0 Success
  834. * - Other Fail.
  835. */
  836. int kpu_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer);
  837. /**
  838. * @brief Kpu free kmodel buffer
  839. *
  840. * @param[in] ctx kmodel object
  841. *
  842. */
  843. void kpu_model_free(kpu_model_context_t *ctx);
  844. /**
  845. * @brief Kpu load kmodel
  846. *
  847. * @param[in] ctx Kmodel object
  848. * @param[in] index Output index
  849. * @param[in] data Output data
  850. * @param[in] size Output data size
  851. *
  852. * @return result
  853. * - 0 Success
  854. * - Other Fail.
  855. */
  856. int kpu_get_output(kpu_model_context_t *ctx, uint32_t index, uint8_t **data, size_t *size);
  857. /**
  858. * @brief Kpu run kmodel
  859. *
  860. * @param[in] ctx Kmodel object
  861. * @param[in] src Source data
  862. * @param[in] dma_ch Dma channel
  863. * @param[in] done_callback Kpu complete callback
  864. * @param[in] userdata Data of callback
  865. *
  866. * @return result
  867. * - 0 Success
  868. * - Other Fail.
  869. */
  870. int kpu_run_kmodel(kpu_model_context_t *ctx, const uint8_t *src, dmac_channel_number_t dma_ch, kpu_done_callback_t done_callback, void *userdata);
  871. #ifdef __cplusplus
  872. }
  873. #endif
  874. #endif