mat.h 66 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #ifndef NCNN_MAT_H
  15. #define NCNN_MAT_H
  16. #include <stdlib.h>
  17. #include <string.h>
  18. #if __ARM_NEON
  19. #include <arm_neon.h>
  20. #endif
  21. #if __SSE2__
  22. #include <emmintrin.h>
  23. #if __AVX__
  24. #include <immintrin.h>
  25. #endif
  26. #endif
  27. #if __mips_msa
  28. #include <msa.h>
  29. #endif
  30. #if __loongarch_sx
  31. #include <lsxintrin.h>
  32. #endif
  33. #if __riscv_vector
  34. #include <riscv_vector.h>
  35. #include "cpu.h" // cpu_riscv_vlenb()
  36. #endif
  37. #include "allocator.h"
  38. #include "option.h"
  39. #include "platform.h"
  40. #if NCNN_VULKAN
  41. #include <vulkan/vulkan.h>
  42. #endif // NCNN_VULKAN
  43. #if NCNN_PIXEL
  44. #if NCNN_PLATFORM_API
  45. #if __ANDROID_API__ >= 9
  46. #include <android/bitmap.h>
  47. #include <jni.h>
  48. #endif // __ANDROID_API__ >= 9
  49. #endif // NCNN_PLATFORM_API
  50. #endif // NCNN_PIXEL
  51. namespace ncnn {
  52. #if NCNN_VULKAN
  53. class VkMat;
  54. class VkImageMat;
  55. #endif // NCNN_VULKAN
  56. // the three dimension matrix
  57. class NCNN_EXPORT Mat
  58. {
  59. public:
  60. // empty
  61. Mat();
  62. // vec
  63. Mat(int w, size_t elemsize = 4u, Allocator* allocator = 0);
  64. // image
  65. Mat(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0);
  66. // dim
  67. Mat(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0);
  68. // cube
  69. Mat(int w, int h, int d, int c, size_t elemsize = 4u, Allocator* allocator = 0);
  70. // packed vec
  71. Mat(int w, size_t elemsize, int elempack, Allocator* allocator = 0);
  72. // packed image
  73. Mat(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0);
  74. // packed dim
  75. Mat(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0);
  76. // packed cube
  77. Mat(int w, int h, int d, int c, size_t elemsize, int elempack, Allocator* allocator = 0);
  78. // copy
  79. Mat(const Mat& m);
  80. // external vec
  81. Mat(int w, void* data, size_t elemsize = 4u, Allocator* allocator = 0);
  82. // external image
  83. Mat(int w, int h, void* data, size_t elemsize = 4u, Allocator* allocator = 0);
  84. // external dim
  85. Mat(int w, int h, int c, void* data, size_t elemsize = 4u, Allocator* allocator = 0);
  86. // external cube
  87. Mat(int w, int h, int d, int c, void* data, size_t elemsize = 4u, Allocator* allocator = 0);
  88. // external packed vec
  89. Mat(int w, void* data, size_t elemsize, int elempack, Allocator* allocator = 0);
  90. // external packed image
  91. Mat(int w, int h, void* data, size_t elemsize, int elempack, Allocator* allocator = 0);
  92. // external packed dim
  93. Mat(int w, int h, int c, void* data, size_t elemsize, int elempack, Allocator* allocator = 0);
  94. // external packed cube
  95. Mat(int w, int h, int d, int c, void* data, size_t elemsize, int elempack, Allocator* allocator = 0);
  96. // release
  97. ~Mat();
  98. // assign
  99. Mat& operator=(const Mat& m);
  100. // set all
  101. void fill(float v);
  102. void fill(int v);
  103. #if __ARM_NEON
  104. void fill(float32x4_t _v);
  105. void fill(uint16x4_t _v);
  106. void fill(int32x4_t _v);
  107. void fill(int32x4_t _v0, int32x4_t _v1);
  108. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  109. void fill(float16x4_t _v);
  110. void fill(float16x8_t _v);
  111. #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  112. #endif // __ARM_NEON
  113. #if __SSE2__
  114. #if __AVX__
  115. #if __AVX512F__
  116. void fill(__m512 _v);
  117. #endif // __AVX512F__
  118. void fill(__m256 _v, int i = 0);
  119. #endif // __AVX__
  120. void fill(__m128 _v);
  121. void fill(__m128i _v);
  122. #endif // __SSE2__
  123. #if __mips_msa
  124. void fill(v4f32 _v);
  125. #endif // __mips_msa
  126. #if __loongarch_sx
  127. void fill(__m128 _v);
  128. #endif //__loongarch_sx
  129. #if __riscv_vector
  130. void fill(vfloat32m1_t _v);
  131. void fill(vuint16m1_t _v);
  132. void fill(vint8m1_t _v);
  133. #if __riscv_zfh
  134. void fill(vfloat16m1_t _v);
  135. #endif // __riscv_zfh
  136. #endif // __riscv_vector
  137. template<typename T>
  138. void fill(T v);
  139. // deep copy
  140. Mat clone(Allocator* allocator = 0) const;
  141. // deep copy from other mat, inplace
  142. void clone_from(const ncnn::Mat& mat, Allocator* allocator = 0);
  143. // reshape vec
  144. Mat reshape(int w, Allocator* allocator = 0) const;
  145. // reshape image
  146. Mat reshape(int w, int h, Allocator* allocator = 0) const;
  147. // reshape dim
  148. Mat reshape(int w, int h, int c, Allocator* allocator = 0) const;
  149. // reshape cube
  150. Mat reshape(int w, int h, int d, int c, Allocator* allocator = 0) const;
  151. // allocate vec
  152. void create(int w, size_t elemsize = 4u, Allocator* allocator = 0);
  153. // allocate image
  154. void create(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0);
  155. // allocate dim
  156. void create(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0);
  157. // allocate cube
  158. void create(int w, int h, int d, int c, size_t elemsize = 4u, Allocator* allocator = 0);
  159. // allocate packed vec
  160. void create(int w, size_t elemsize, int elempack, Allocator* allocator = 0);
  161. // allocate packed image
  162. void create(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0);
  163. // allocate packed dim
  164. void create(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0);
  165. // allocate packed cube
  166. void create(int w, int h, int d, int c, size_t elemsize, int elempack, Allocator* allocator = 0);
  167. // allocate like
  168. void create_like(const Mat& m, Allocator* allocator = 0);
  169. #if NCNN_VULKAN
  170. // allocate like
  171. void create_like(const VkMat& m, Allocator* allocator = 0);
  172. // allocate like
  173. void create_like(const VkImageMat& im, Allocator* allocator = 0);
  174. #endif // NCNN_VULKAN
  175. // refcount++
  176. void addref();
  177. // refcount--
  178. void release();
  179. bool empty() const;
  180. size_t total() const;
  181. // bits per element
  182. int elembits() const;
  183. // shape only
  184. Mat shape() const;
  185. // data reference
  186. Mat channel(int c);
  187. const Mat channel(int c) const;
  188. Mat depth(int z);
  189. const Mat depth(int z) const;
  190. float* row(int y);
  191. const float* row(int y) const;
  192. template<typename T>
  193. T* row(int y);
  194. template<typename T>
  195. const T* row(int y) const;
  196. // range reference
  197. Mat channel_range(int c, int channels);
  198. const Mat channel_range(int c, int channels) const;
  199. Mat depth_range(int z, int depths);
  200. const Mat depth_range(int z, int depths) const;
  201. Mat row_range(int y, int rows);
  202. const Mat row_range(int y, int rows) const;
  203. Mat range(int x, int n);
  204. const Mat range(int x, int n) const;
  205. // access raw data
  206. template<typename T>
  207. operator T*();
  208. template<typename T>
  209. operator const T*() const;
  210. // convenient access float vec element
  211. float& operator[](size_t i);
  212. const float& operator[](size_t i) const;
  213. #if NCNN_PIXEL
  214. enum PixelType
  215. {
  216. PIXEL_CONVERT_SHIFT = 16,
  217. PIXEL_FORMAT_MASK = 0x0000ffff,
  218. PIXEL_CONVERT_MASK = 0xffff0000,
  219. PIXEL_RGB = 1,
  220. PIXEL_BGR = 2,
  221. PIXEL_GRAY = 3,
  222. PIXEL_RGBA = 4,
  223. PIXEL_BGRA = 5,
  224. PIXEL_RGB2BGR = PIXEL_RGB | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
  225. PIXEL_RGB2GRAY = PIXEL_RGB | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
  226. PIXEL_RGB2RGBA = PIXEL_RGB | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT),
  227. PIXEL_RGB2BGRA = PIXEL_RGB | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT),
  228. PIXEL_BGR2RGB = PIXEL_BGR | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
  229. PIXEL_BGR2GRAY = PIXEL_BGR | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
  230. PIXEL_BGR2RGBA = PIXEL_BGR | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT),
  231. PIXEL_BGR2BGRA = PIXEL_BGR | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT),
  232. PIXEL_GRAY2RGB = PIXEL_GRAY | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
  233. PIXEL_GRAY2BGR = PIXEL_GRAY | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
  234. PIXEL_GRAY2RGBA = PIXEL_GRAY | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT),
  235. PIXEL_GRAY2BGRA = PIXEL_GRAY | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT),
  236. PIXEL_RGBA2RGB = PIXEL_RGBA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
  237. PIXEL_RGBA2BGR = PIXEL_RGBA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
  238. PIXEL_RGBA2GRAY = PIXEL_RGBA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
  239. PIXEL_RGBA2BGRA = PIXEL_RGBA | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT),
  240. PIXEL_BGRA2RGB = PIXEL_BGRA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
  241. PIXEL_BGRA2BGR = PIXEL_BGRA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
  242. PIXEL_BGRA2GRAY = PIXEL_BGRA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
  243. PIXEL_BGRA2RGBA = PIXEL_BGRA | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT),
  244. };
  245. // convenient construct from pixel data
  246. static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, Allocator* allocator = 0);
  247. // convenient construct from pixel data with stride(bytes-per-row) parameter
  248. static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, Allocator* allocator = 0);
  249. // convenient construct from pixel data and resize to specific size
  250. static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int target_width, int target_height, Allocator* allocator = 0);
  251. // convenient construct from pixel data and resize to specific size with stride(bytes-per-row) parameter
  252. static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, Allocator* allocator = 0);
  253. // convenient construct from pixel data roi
  254. static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0);
  255. // convenient construct from pixel data roi with stride(bytes-per-row) parameter
  256. static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0);
  257. // convenient construct from pixel data roi and resize to specific size
  258. static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0);
  259. // convenient construct from pixel data roi and resize to specific size with stride(bytes-per-row) parameter
  260. static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0);
  261. // convenient export to pixel data
  262. void to_pixels(unsigned char* pixels, int type) const;
  263. // convenient export to pixel data with stride(bytes-per-row) parameter
  264. void to_pixels(unsigned char* pixels, int type, int stride) const;
  265. // convenient export to pixel data and resize to specific size
  266. void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height) const;
  267. // convenient export to pixel data and resize to specific size with stride(bytes-per-row) parameter
  268. void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height, int target_stride) const;
  269. #if NCNN_PLATFORM_API
  270. #if __ANDROID_API__ >= 9
  271. // convenient construct from android Bitmap
  272. static Mat from_android_bitmap(JNIEnv* env, jobject bitmap, int type_to, Allocator* allocator = 0);
  273. // convenient construct from android Bitmap and resize to specific size
  274. static Mat from_android_bitmap_resize(JNIEnv* env, jobject bitmap, int type_to, int target_width, int target_height, Allocator* allocator = 0);
  275. // convenient construct from android Bitmap roi
  276. static Mat from_android_bitmap_roi(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0);
  277. // convenient construct from android Bitmap roi and resize to specific size
  278. static Mat from_android_bitmap_roi_resize(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0);
  279. // convenient export to android Bitmap and resize to the android Bitmap size
  280. void to_android_bitmap(JNIEnv* env, jobject bitmap, int type_from) const;
  281. #endif // __ANDROID_API__ >= 9
  282. #endif // NCNN_PLATFORM_API
  283. #endif // NCNN_PIXEL
  284. // substract channel-wise mean values, then multiply by normalize values, pass 0 to skip
  285. void substract_mean_normalize(const float* mean_vals, const float* norm_vals);
  286. // convenient construct from half precision floating point data
  287. static Mat from_float16(const unsigned short* data, int size);
  288. // pointer to the data
  289. void* data;
  290. // pointer to the reference counter
  291. // when points to user-allocated data, the pointer is NULL
  292. int* refcount;
  293. // element size in bytes
  294. // 4 = float32/int32
  295. // 2 = float16
  296. // 1 = int8/uint8
  297. // 0 = empty
  298. size_t elemsize;
  299. // packed count inside element
  300. // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar
  301. // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon
  302. // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16
  303. int elempack;
  304. // the allocator
  305. Allocator* allocator;
  306. // the dimension rank
  307. int dims;
  308. int w;
  309. int h;
  310. int d;
  311. int c;
  312. size_t cstep;
  313. };
  314. #if NCNN_VULKAN
  315. // the three dimension matrix, vulkan version
  316. class NCNN_EXPORT VkMat
  317. {
  318. public:
  319. // empty
  320. VkMat();
  321. // vec
  322. VkMat(int w, size_t elemsize, VkAllocator* allocator);
  323. // image
  324. VkMat(int w, int h, size_t elemsize, VkAllocator* allocator);
  325. // dim
  326. VkMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator);
  327. // cube
  328. VkMat(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator);
  329. // packed vec
  330. VkMat(int w, size_t elemsize, int elempack, VkAllocator* allocator);
  331. // packed image
  332. VkMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator);
  333. // packed dim
  334. VkMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator);
  335. // packed cube
  336. VkMat(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator);
  337. // copy
  338. VkMat(const VkMat& m);
  339. // external vec
  340. VkMat(int w, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator);
  341. // external image
  342. VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator);
  343. // external dim
  344. VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator);
  345. // external cube
  346. VkMat(int w, int h, int d, int c, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator);
  347. // external packed vec
  348. VkMat(int w, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
  349. // external packed image
  350. VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
  351. // external packed dim
  352. VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
  353. // external packed cube
  354. VkMat(int w, int h, int d, int c, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
  355. // release
  356. ~VkMat();
  357. // assign
  358. VkMat& operator=(const VkMat& m);
  359. // allocate vec
  360. void create(int w, size_t elemsize, VkAllocator* allocator);
  361. // allocate image
  362. void create(int w, int h, size_t elemsize, VkAllocator* allocator);
  363. // allocate dim
  364. void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator);
  365. // allocate cube
  366. void create(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator);
  367. // allocate packed vec
  368. void create(int w, size_t elemsize, int elempack, VkAllocator* allocator);
  369. // allocate packed image
  370. void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator);
  371. // allocate packed dim
  372. void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator);
  373. // allocate packed cube
  374. void create(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator);
  375. // allocate like
  376. void create_like(const Mat& m, VkAllocator* allocator);
  377. // allocate like
  378. void create_like(const VkMat& m, VkAllocator* allocator);
  379. // allocate like
  380. void create_like(const VkImageMat& im, VkAllocator* allocator);
  381. // mapped
  382. Mat mapped() const;
  383. void* mapped_ptr() const;
  384. // refcount++
  385. void addref();
  386. // refcount--
  387. void release();
  388. bool empty() const;
  389. size_t total() const;
  390. // bits per element
  391. int elembits() const;
  392. // shape only
  393. Mat shape() const;
  394. // low-level reference
  395. VkBuffer buffer() const;
  396. size_t buffer_offset() const;
  397. size_t buffer_capacity() const;
  398. // device buffer
  399. VkBufferMemory* data;
  400. // pointer to the reference counter
  401. // when points to user-allocated data, the pointer is NULL
  402. int* refcount;
  403. // element size in bytes
  404. // 4 = float32/int32
  405. // 2 = float16
  406. // 1 = int8/uint8
  407. // 0 = empty
  408. size_t elemsize;
  409. // packed count inside element
  410. // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar
  411. // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon
  412. // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16
  413. int elempack;
  414. // the allocator
  415. VkAllocator* allocator;
  416. // the dimension rank
  417. int dims;
  418. int w;
  419. int h;
  420. int d;
  421. int c;
  422. size_t cstep;
  423. };
  424. class NCNN_EXPORT VkImageMat
  425. {
  426. public:
  427. // empty
  428. VkImageMat();
  429. // vec
  430. VkImageMat(int w, size_t elemsize, VkAllocator* allocator);
  431. // image
  432. VkImageMat(int w, int h, size_t elemsize, VkAllocator* allocator);
  433. // dim
  434. VkImageMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator);
  435. // cube
  436. VkImageMat(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator);
  437. // packed vec
  438. VkImageMat(int w, size_t elemsize, int elempack, VkAllocator* allocator);
  439. // packed image
  440. VkImageMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator);
  441. // packed dim
  442. VkImageMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator);
  443. // packed cube
  444. VkImageMat(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator);
  445. // copy
  446. VkImageMat(const VkImageMat& m);
  447. // external vec
  448. VkImageMat(int w, VkImageMemory* data, size_t elemsize, VkAllocator* allocator);
  449. // external image
  450. VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, VkAllocator* allocator);
  451. // external dim
  452. VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, VkAllocator* allocator);
  453. // external cube
  454. VkImageMat(int w, int h, int d, int c, VkImageMemory* data, size_t elemsize, VkAllocator* allocator);
  455. // external packed vec
  456. VkImageMat(int w, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
  457. // external packed image
  458. VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
  459. // external packed dim
  460. VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
  461. // external packed cube
  462. VkImageMat(int w, int h, int d, int c, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
  463. // release
  464. ~VkImageMat();
  465. // assign
  466. VkImageMat& operator=(const VkImageMat& m);
  467. // allocate vec
  468. void create(int w, size_t elemsize, VkAllocator* allocator);
  469. // allocate image
  470. void create(int w, int h, size_t elemsize, VkAllocator* allocator);
  471. // allocate dim
  472. void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator);
  473. // allocate cube
  474. void create(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator);
  475. // allocate packed vec
  476. void create(int w, size_t elemsize, int elempack, VkAllocator* allocator);
  477. // allocate packed image
  478. void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator);
  479. // allocate packed dim
  480. void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator);
  481. // allocate packed cube
  482. void create(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator);
  483. // allocate like
  484. void create_like(const Mat& m, VkAllocator* allocator);
  485. // allocate like
  486. void create_like(const VkMat& m, VkAllocator* allocator);
  487. // allocate like
  488. void create_like(const VkImageMat& im, VkAllocator* allocator);
  489. // mapped
  490. Mat mapped() const;
  491. void* mapped_ptr() const;
  492. // refcount++
  493. void addref();
  494. // refcount--
  495. void release();
  496. bool empty() const;
  497. size_t total() const;
  498. // bits per element
  499. int elembits() const;
  500. // shape only
  501. Mat shape() const;
  502. // low-level reference
  503. VkImage image() const;
  504. VkImageView imageview() const;
  505. #if NCNN_PLATFORM_API
  506. #if __ANDROID_API__ >= 26
  507. // convenient construct from android hardware buffer
  508. static VkImageMat from_android_hardware_buffer(VkAndroidHardwareBufferImageAllocator* allocator);
  509. #endif // __ANDROID_API__ >= 26
  510. #endif // NCNN_PLATFORM_API
  511. // device image
  512. VkImageMemory* data;
  513. // pointer to the reference counter
  514. // when points to user-allocated data, the pointer is NULL
  515. int* refcount;
  516. // element size in bytes
  517. // 4 = float32/int32
  518. // 2 = float16
  519. // 1 = int8/uint8
  520. // 0 = empty
  521. size_t elemsize;
  522. // packed count inside element
  523. // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar
  524. // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon
  525. // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16
  526. int elempack;
  527. // the allocator
  528. VkAllocator* allocator;
  529. // the dimension rank
  530. int dims;
  531. int w;
  532. int h;
  533. int d;
  534. int c;
  535. };
  536. // type for vulkan specialization constant and push constant
  537. union vk_specialization_type
  538. {
  539. int i;
  540. float f;
  541. uint32_t u32;
  542. };
  543. union vk_constant_type
  544. {
  545. int i;
  546. float f;
  547. };
  548. #endif // NCNN_VULKAN
  549. // misc function
  550. #if NCNN_PIXEL
  551. // convert yuv420sp(nv21) to rgb, the fast approximate version
  552. NCNN_EXPORT void yuv420sp2rgb(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb);
  553. // convert yuv420sp(nv12) to rgb, the fast approximate version
  554. NCNN_EXPORT void yuv420sp2rgb_nv12(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb);
  555. // convert yuv420sp(nv21) to rgb with half resize, the faster approximate version
  556. NCNN_EXPORT void yuv420sp2rgb_half(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb);
  557. // image pixel bilinear resize
  558. NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
  559. NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
  560. NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
  561. NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
  562. // image pixel bilinear resize with stride(bytes-per-row) parameter
  563. NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride);
  564. NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride);
  565. NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride);
  566. NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride);
  567. // image pixel bilinear resize, convenient wrapper for yuv420sp(nv21/nv12)
  568. NCNN_EXPORT void resize_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
  569. #endif // NCNN_PIXEL
  570. #if NCNN_PIXEL_ROTATE
  571. // type is the from type, 6 means rotating from 6 to 1
  572. //
  573. // 1 2 3 4 5 6 7 8
  574. //
  575. // 888888 888888 88 88 8888888888 88 88 8888888888
  576. // 88 88 88 88 88 88 88 88 88 88 88 88
  577. // 8888 8888 8888 8888 88 8888888888 8888888888 88
  578. // 88 88 88 88
  579. // 88 88 888888 888888
  580. //
  581. // ref http://sylvana.net/jpegcrop/exif_orientation.html
  582. // image pixel kanna rotate
  583. NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
  584. NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
  585. NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
  586. NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
  587. // image pixel kanna rotate with stride(bytes-per-row) parameter
  588. NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type);
  589. NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type);
  590. NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type);
  591. NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type);
  592. // image pixel kanna rotate, convenient wrapper for yuv420sp(nv21/nv12)
  593. NCNN_EXPORT void kanna_rotate_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
  594. #endif // NCNN_PIXEL_ROTATE
  595. #if NCNN_PIXEL_AFFINE
  596. // resolve affine transform matrix from rotation angle, scale factor and x y offset
  597. NCNN_EXPORT void get_rotation_matrix(float angle, float scale, float dx, float dy, float* tm);
  598. // resolve affine transform matrix from two set of points, num_point must be >= 2
  599. NCNN_EXPORT void get_affine_transform(const float* points_from, const float* points_to, int num_point, float* tm);
  600. // resolve the inversion affine transform matrix
  601. NCNN_EXPORT void invert_affine_transform(const float* tm, float* tm_inv);
  602. // image pixel bilinear warpaffine inverse transform, set -233 for transparent border color, the color RGBA is little-endian encoded
  603. NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
  604. NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
  605. NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
  606. NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
  607. // image pixel bilinear warpaffine inverse transform with stride(bytes-per-row) parameter, set -233 for transparent border color, the color RGBA is little-endian encoded
  608. NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0);
  609. NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0);
  610. NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0);
  611. NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0);
  612. // image pixel bilinear warpaffine, convenient wrapper for yuv420sp(nv21/nv12), set -233 for transparent border color, the color YUV_ is little-endian encoded
  613. NCNN_EXPORT void warpaffine_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
  614. #endif // NCNN_PIXEL_AFFINE
  615. #if NCNN_PIXEL_DRAWING
  616. // draw rectangle, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded
  617. NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
  618. NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
  619. NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
  620. NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
  621. // draw rectangle with stride(bytes-per-row) parameter, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded
  622. NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
  623. NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
  624. NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
  625. NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
  626. // draw rectangle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled rectangle, the color YUV_ is little-endian encoded
  627. NCNN_EXPORT void draw_rectangle_yuv420sp(unsigned char* yuv420sp, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
  628. // draw circle, set thickness -1 for filled circle, the color RGBA is little-endian encoded
  629. NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
  630. NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
  631. NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
  632. NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
  633. // draw circle with stride(bytes-per-row) parameter, set thickness -1 for filled circle, the color RGBA is little-endian encoded
  634. NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness);
  635. NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness);
  636. NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness);
  637. NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness);
  638. // draw circle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled circle, the color YUV_ is little-endian encoded
  639. NCNN_EXPORT void draw_circle_yuv420sp(unsigned char* yuv420sp, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
  640. // draw line, the color RGBA is little-endian encoded
  641. NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
  642. NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
  643. NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
  644. NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
  645. // draw line with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded
  646. NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
  647. NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
  648. NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
  649. NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
  650. // draw line, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded
  651. NCNN_EXPORT void draw_line_yuv420sp(unsigned char* yuv420sp, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
  652. // resolve text bounding box size
  653. NCNN_EXPORT void get_text_drawing_size(const char* text, int fontpixelsize, int* w, int* h);
  654. // draw ascii printables and newline, the color RGBA is little-endian encoded
  655. NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
  656. NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
  657. NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
  658. NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
  659. // draw ascii printables and newline with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded
  660. NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color);
  661. NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color);
  662. NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color);
  663. NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color);
  664. // draw ascii printables and newline, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded
  665. NCNN_EXPORT void draw_text_yuv420sp(unsigned char* yuv420sp, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
  666. #endif // NCNN_PIXEL_DRAWING
  667. // type conversion
  668. // convert float to half precision floating point
  669. NCNN_EXPORT unsigned short float32_to_float16(float value);
  670. // convert half precision floating point to float
  671. NCNN_EXPORT float float16_to_float32(unsigned short value);
  672. // convert float to brain half
  673. NCNN_EXPORT NCNN_FORCEINLINE unsigned short float32_to_bfloat16(float value)
  674. {
  675. // 16 : 16
  676. union
  677. {
  678. unsigned int u;
  679. float f;
  680. } tmp;
  681. tmp.f = value;
  682. return tmp.u >> 16;
  683. }
  684. // convert brain half to float
  685. NCNN_EXPORT NCNN_FORCEINLINE float bfloat16_to_float32(unsigned short value)
  686. {
  687. // 16 : 16
  688. union
  689. {
  690. unsigned int u;
  691. float f;
  692. } tmp;
  693. tmp.u = value << 16;
  694. return tmp.f;
  695. }
  696. // mat process
  697. enum BorderType
  698. {
  699. BORDER_CONSTANT = 0,
  700. BORDER_REPLICATE = 1,
  701. BORDER_REFLECT = 2,
  702. BORDER_TRANSPARENT = -233,
  703. };
  704. NCNN_EXPORT void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt = Option());
  705. NCNN_EXPORT void copy_make_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, int type, float v, const Option& opt = Option());
  706. NCNN_EXPORT void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const Option& opt = Option());
  707. NCNN_EXPORT void copy_cut_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, const Option& opt = Option());
  708. NCNN_EXPORT void resize_nearest(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option());
  709. NCNN_EXPORT void resize_bilinear(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option());
  710. NCNN_EXPORT void resize_bicubic(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option());
  711. NCNN_EXPORT void convert_packing(const Mat& src, Mat& dst, int elempack, const Option& opt = Option());
  712. NCNN_EXPORT void flatten(const Mat& src, Mat& dst, const Option& opt = Option());
  713. NCNN_EXPORT void cast_float32_to_float16(const Mat& src, Mat& dst, const Option& opt = Option());
  714. NCNN_EXPORT void cast_float16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option());
  715. NCNN_EXPORT void cast_int8_to_float32(const Mat& src, Mat& dst, const Option& opt = Option());
  716. NCNN_EXPORT void cast_float32_to_bfloat16(const Mat& src, Mat& dst, const Option& opt = Option());
  717. NCNN_EXPORT void cast_bfloat16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option());
  718. NCNN_EXPORT void quantize_to_int8(const Mat& src, Mat& dst, const Mat& scale_data, const Option& opt = Option());
  719. NCNN_EXPORT void dequantize_from_int32(const Mat& src, Mat& dst, const Mat& scale_data, const Mat& bias_data, const Option& opt = Option());
  720. NCNN_EXPORT void requantize_from_int32_to_int8(const Mat& src, Mat& dst, const Mat& scale_in_data, const Mat& scale_out_data, const Mat& bias_data, int activation_type, const Mat& activation_params, const Option& opt = Option());
  721. NCNN_FORCEINLINE Mat::Mat()
  722. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  723. {
  724. }
  725. NCNN_FORCEINLINE Mat::Mat(int _w, size_t _elemsize, Allocator* _allocator)
  726. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  727. {
  728. create(_w, _elemsize, _allocator);
  729. }
  730. NCNN_FORCEINLINE Mat::Mat(int _w, int _h, size_t _elemsize, Allocator* _allocator)
  731. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  732. {
  733. create(_w, _h, _elemsize, _allocator);
  734. }
  735. NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator)
  736. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  737. {
  738. create(_w, _h, _c, _elemsize, _allocator);
  739. }
  740. NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, Allocator* _allocator)
  741. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  742. {
  743. create(_w, _h, _d, _c, _elemsize, _allocator);
  744. }
  745. NCNN_FORCEINLINE Mat::Mat(int _w, size_t _elemsize, int _elempack, Allocator* _allocator)
  746. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  747. {
  748. create(_w, _elemsize, _elempack, _allocator);
  749. }
  750. NCNN_FORCEINLINE Mat::Mat(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator)
  751. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  752. {
  753. create(_w, _h, _elemsize, _elempack, _allocator);
  754. }
  755. NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
  756. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  757. {
  758. create(_w, _h, _c, _elemsize, _elempack, _allocator);
  759. }
  760. NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
  761. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  762. {
  763. create(_w, _h, _d, _c, _elemsize, _elempack, _allocator);
  764. }
  765. NCNN_FORCEINLINE Mat::Mat(const Mat& m)
  766. : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c), cstep(m.cstep)
  767. {
  768. addref();
  769. }
  770. NCNN_FORCEINLINE Mat::Mat(int _w, void* _data, size_t _elemsize, Allocator* _allocator)
  771. : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1)
  772. {
  773. cstep = w;
  774. }
  775. NCNN_FORCEINLINE Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, Allocator* _allocator)
  776. : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1)
  777. {
  778. cstep = (size_t)w * h;
  779. }
  780. NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, Allocator* _allocator)
  781. : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c)
  782. {
  783. cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
  784. }
  785. NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, Allocator* _allocator)
  786. : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c)
  787. {
  788. cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize;
  789. }
  790. NCNN_FORCEINLINE Mat::Mat(int _w, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
  791. : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1)
  792. {
  793. cstep = w;
  794. }
  795. NCNN_FORCEINLINE Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
  796. : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1)
  797. {
  798. cstep = (size_t)w * h;
  799. }
  800. NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
  801. : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c)
  802. {
  803. cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
  804. }
  805. NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
  806. : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c)
  807. {
  808. cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize;
  809. }
  810. NCNN_FORCEINLINE Mat::~Mat()
  811. {
  812. release();
  813. }
  814. NCNN_FORCEINLINE void Mat::fill(float _v)
  815. {
  816. int size = (int)total();
  817. float* ptr = (float*)data;
  818. int i = 0;
  819. #if __ARM_NEON
  820. float32x4_t _c = vdupq_n_f32(_v);
  821. for (; i + 3 < size; i += 4)
  822. {
  823. vst1q_f32(ptr, _c);
  824. ptr += 4;
  825. }
  826. #endif // __ARM_NEON
  827. for (; i < size; i++)
  828. {
  829. *ptr++ = _v;
  830. }
  831. }
  832. NCNN_FORCEINLINE void Mat::fill(int _v)
  833. {
  834. int size = (int)total();
  835. int* ptr = (int*)data;
  836. int i = 0;
  837. #if __ARM_NEON
  838. int32x4_t _c = vdupq_n_s32(_v);
  839. for (; i + 3 < size; i += 4)
  840. {
  841. vst1q_s32(ptr, _c);
  842. ptr += 4;
  843. }
  844. #endif // __ARM_NEON
  845. for (; i < size; i++)
  846. {
  847. *ptr++ = _v;
  848. }
  849. }
  850. #if __ARM_NEON
  851. NCNN_FORCEINLINE void Mat::fill(float32x4_t _v)
  852. {
  853. int size = (int)total();
  854. float* ptr = (float*)data;
  855. for (int i = 0; i < size; i++)
  856. {
  857. vst1q_f32(ptr, _v);
  858. ptr += 4;
  859. }
  860. }
  861. NCNN_FORCEINLINE void Mat::fill(uint16x4_t _v)
  862. {
  863. int size = (int)total();
  864. unsigned short* ptr = (unsigned short*)data;
  865. for (int i = 0; i < size; i++)
  866. {
  867. vst1_u16(ptr, _v);
  868. ptr += 4;
  869. }
  870. }
  871. NCNN_FORCEINLINE void Mat::fill(int32x4_t _v)
  872. {
  873. int size = (int)total();
  874. int* ptr = (int*)data;
  875. for (int i = 0; i < size; i++)
  876. {
  877. vst1q_s32(ptr, _v);
  878. ptr += 4;
  879. }
  880. }
  881. NCNN_FORCEINLINE void Mat::fill(int32x4_t _v0, int32x4_t _v1)
  882. {
  883. int size = (int)total();
  884. int* ptr = (int*)data;
  885. for (int i = 0; i < size; i++)
  886. {
  887. vst1q_s32(ptr, _v0);
  888. vst1q_s32(ptr + 4, _v1);
  889. ptr += 8;
  890. }
  891. }
  892. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  893. NCNN_FORCEINLINE void Mat::fill(float16x4_t _v)
  894. {
  895. int size = (int)total();
  896. __fp16* ptr = (__fp16*)data;
  897. for (int i = 0; i < size; i++)
  898. {
  899. vst1_f16(ptr, _v);
  900. ptr += 4;
  901. }
  902. }
  903. NCNN_FORCEINLINE void Mat::fill(float16x8_t _v)
  904. {
  905. int size = (int)total();
  906. __fp16* ptr = (__fp16*)data;
  907. for (int i = 0; i < size; i++)
  908. {
  909. vst1q_f16(ptr, _v);
  910. ptr += 8;
  911. }
  912. }
  913. #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  914. #endif // __ARM_NEON
  915. #if __SSE2__
  916. #if __AVX__
  917. #if __AVX512F__
  918. NCNN_FORCEINLINE void Mat::fill(__m512 _v)
  919. {
  920. int size = (int)total();
  921. float* ptr = (float*)data;
  922. for (int i = 0; i < size; i++)
  923. {
  924. _mm512_storeu_ps(ptr, _v);
  925. ptr += 16;
  926. }
  927. }
  928. #endif // __AVX512F__
  929. NCNN_FORCEINLINE void Mat::fill(__m256 _v, int _i)
  930. {
  931. // old gcc cannot overload __m128 and __m256 type
  932. // add a dummy int parameter for different mangled function symbol
  933. (void)_i;
  934. int size = (int)total();
  935. float* ptr = (float*)data;
  936. for (int i = 0; i < size; i++)
  937. {
  938. _mm256_storeu_ps(ptr, _v);
  939. ptr += 8;
  940. }
  941. }
  942. #endif // __AVX__
  943. NCNN_FORCEINLINE void Mat::fill(__m128 _v)
  944. {
  945. int size = (int)total();
  946. float* ptr = (float*)data;
  947. for (int i = 0; i < size; i++)
  948. {
  949. _mm_storeu_ps(ptr, _v);
  950. ptr += 4;
  951. }
  952. }
  953. NCNN_FORCEINLINE void Mat::fill(__m128i _v)
  954. {
  955. int size = (int)total();
  956. unsigned short* ptr = (unsigned short*)data;
  957. for (int i = 0; i < size; i++)
  958. {
  959. _mm_store_si128((__m128i*)ptr, _v);
  960. ptr += 8;
  961. }
  962. }
  963. #endif // __SSE2__
  964. #if __mips_msa
  965. NCNN_FORCEINLINE void Mat::fill(v4f32 _v)
  966. {
  967. int size = (int)total();
  968. float* ptr = (float*)data;
  969. for (int i = 0; i < size; i++)
  970. {
  971. __msa_st_w((v4i32)_v, ptr, 0);
  972. ptr += 4;
  973. }
  974. }
  975. #endif // __mips_msa
  976. #if __loongarch_sx
  977. NCNN_FORCEINLINE void Mat::fill(__m128 _v)
  978. {
  979. int size = (int)total();
  980. float* ptr = (float*)data;
  981. for (int i = 0; i < size; i++)
  982. {
  983. __lsx_vst(_v, ptr, 0);
  984. ptr += 4;
  985. }
  986. }
  987. #endif // __loongarch_sx
  988. #if __riscv_vector
  989. NCNN_FORCEINLINE void Mat::fill(vfloat32m1_t _v)
  990. {
  991. const int packn = cpu_riscv_vlenb() / 4;
  992. const size_t vl = vsetvl_e32m1(packn);
  993. int size = (int)total();
  994. float* ptr = (float*)data;
  995. for (int i = 0; i < size; i++)
  996. {
  997. vse32_v_f32m1(ptr, _v, vl);
  998. ptr += packn;
  999. }
  1000. }
  1001. NCNN_FORCEINLINE void Mat::fill(vuint16m1_t _v)
  1002. {
  1003. const int packn = cpu_riscv_vlenb() / 2;
  1004. const size_t vl = vsetvl_e16m1(packn);
  1005. int size = (int)total();
  1006. unsigned short* ptr = (unsigned short*)data;
  1007. for (int i = 0; i < size; i++)
  1008. {
  1009. vse16_v_u16m1(ptr, _v, vl);
  1010. ptr += packn;
  1011. }
  1012. }
  1013. NCNN_FORCEINLINE void Mat::fill(vint8m1_t _v)
  1014. {
  1015. const int packn = cpu_riscv_vlenb() / 1;
  1016. const size_t vl = vsetvl_e8m1(packn);
  1017. int size = (int)total();
  1018. signed char* ptr = (signed char*)data;
  1019. for (int i = 0; i < size; i++)
  1020. {
  1021. vse8_v_i8m1(ptr, _v, vl);
  1022. ptr += packn;
  1023. }
  1024. }
  1025. #if __riscv_zfh
  1026. NCNN_FORCEINLINE void Mat::fill(vfloat16m1_t _v)
  1027. {
  1028. const int packn = cpu_riscv_vlenb() / 2;
  1029. const size_t vl = vsetvl_e16m1(packn);
  1030. int size = (int)total();
  1031. __fp16* ptr = (__fp16*)data;
  1032. for (int i = 0; i < size; i++)
  1033. {
  1034. vse16_v_f16m1(ptr, _v, vl);
  1035. ptr += packn;
  1036. }
  1037. }
  1038. #endif // __riscv_zfh
  1039. #endif // __riscv_vector
  1040. template<typename T>
  1041. NCNN_FORCEINLINE void Mat::fill(T _v)
  1042. {
  1043. int size = (int)total();
  1044. T* ptr = (T*)data;
  1045. for (int i = 0; i < size; i++)
  1046. {
  1047. ptr[i] = _v;
  1048. }
  1049. }
  1050. NCNN_FORCEINLINE Mat& Mat::operator=(const Mat& m)
  1051. {
  1052. if (this == &m)
  1053. return *this;
  1054. if (m.refcount)
  1055. NCNN_XADD(m.refcount, 1);
  1056. release();
  1057. data = m.data;
  1058. refcount = m.refcount;
  1059. elemsize = m.elemsize;
  1060. elempack = m.elempack;
  1061. allocator = m.allocator;
  1062. dims = m.dims;
  1063. w = m.w;
  1064. h = m.h;
  1065. d = m.d;
  1066. c = m.c;
  1067. cstep = m.cstep;
  1068. return *this;
  1069. }
  1070. NCNN_FORCEINLINE void Mat::addref()
  1071. {
  1072. if (refcount)
  1073. NCNN_XADD(refcount, 1);
  1074. }
  1075. NCNN_FORCEINLINE void Mat::release()
  1076. {
  1077. if (refcount && NCNN_XADD(refcount, -1) == 1)
  1078. {
  1079. if (allocator)
  1080. allocator->fastFree(data);
  1081. else
  1082. fastFree(data);
  1083. }
  1084. data = 0;
  1085. elemsize = 0;
  1086. elempack = 0;
  1087. dims = 0;
  1088. w = 0;
  1089. h = 0;
  1090. d = 0;
  1091. c = 0;
  1092. cstep = 0;
  1093. refcount = 0;
  1094. }
  1095. NCNN_FORCEINLINE bool Mat::empty() const
  1096. {
  1097. return data == 0 || total() == 0;
  1098. }
  1099. NCNN_FORCEINLINE size_t Mat::total() const
  1100. {
  1101. return cstep * c;
  1102. }
  1103. NCNN_FORCEINLINE int Mat::elembits() const
  1104. {
  1105. return elempack ? static_cast<int>(elemsize * 8) / elempack : 0;
  1106. }
  1107. NCNN_FORCEINLINE Mat Mat::shape() const
  1108. {
  1109. if (dims == 1)
  1110. return Mat(w * elempack, (void*)0);
  1111. if (dims == 2)
  1112. return Mat(w, h * elempack, (void*)0);
  1113. if (dims == 3)
  1114. return Mat(w, h, c * elempack, (void*)0);
  1115. if (dims == 4)
  1116. return Mat(w, h, d, c * elempack, (void*)0);
  1117. return Mat();
  1118. }
  1119. NCNN_FORCEINLINE Mat Mat::channel(int _c)
  1120. {
  1121. Mat m(w, h, d, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
  1122. m.dims = dims - 1;
  1123. if (dims == 4)
  1124. m.cstep = (size_t)w * h;
  1125. return m;
  1126. }
  1127. NCNN_FORCEINLINE const Mat Mat::channel(int _c) const
  1128. {
  1129. Mat m(w, h, d, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
  1130. m.dims = dims - 1;
  1131. if (dims == 4)
  1132. m.cstep = (size_t)w * h;
  1133. return m;
  1134. }
  1135. NCNN_FORCEINLINE Mat Mat::depth(int z)
  1136. {
  1137. return Mat(w, h, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator);
  1138. }
  1139. NCNN_FORCEINLINE const Mat Mat::depth(int z) const
  1140. {
  1141. return Mat(w, h, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator);
  1142. }
  1143. NCNN_FORCEINLINE float* Mat::row(int y)
  1144. {
  1145. return (float*)((unsigned char*)data + (size_t)w * y * elemsize);
  1146. }
  1147. NCNN_FORCEINLINE const float* Mat::row(int y) const
  1148. {
  1149. return (const float*)((unsigned char*)data + (size_t)w * y * elemsize);
  1150. }
  1151. template<typename T>
  1152. NCNN_FORCEINLINE T* Mat::row(int y)
  1153. {
  1154. return (T*)((unsigned char*)data + (size_t)w * y * elemsize);
  1155. }
  1156. template<typename T>
  1157. NCNN_FORCEINLINE const T* Mat::row(int y) const
  1158. {
  1159. return (const T*)((unsigned char*)data + (size_t)w * y * elemsize);
  1160. }
  1161. NCNN_FORCEINLINE Mat Mat::channel_range(int _c, int channels)
  1162. {
  1163. Mat m(w, h, d, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
  1164. m.dims = dims;
  1165. return m;
  1166. }
  1167. NCNN_FORCEINLINE const Mat Mat::channel_range(int _c, int channels) const
  1168. {
  1169. Mat m(w, h, d, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
  1170. m.dims = dims;
  1171. return m;
  1172. }
  1173. NCNN_FORCEINLINE Mat Mat::depth_range(int z, int depths)
  1174. {
  1175. Mat m(w, h, depths, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator);
  1176. m.cstep = (size_t)w * h;
  1177. return m;
  1178. }
  1179. NCNN_FORCEINLINE const Mat Mat::depth_range(int z, int depths) const
  1180. {
  1181. Mat m(w, h, depths, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator);
  1182. m.cstep = (size_t)w * h;
  1183. return m;
  1184. }
  1185. NCNN_FORCEINLINE Mat Mat::row_range(int y, int rows)
  1186. {
  1187. return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator);
  1188. }
  1189. NCNN_FORCEINLINE const Mat Mat::row_range(int y, int rows) const
  1190. {
  1191. return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator);
  1192. }
  1193. NCNN_FORCEINLINE Mat Mat::range(int x, int n)
  1194. {
  1195. return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator);
  1196. }
  1197. NCNN_FORCEINLINE const Mat Mat::range(int x, int n) const
  1198. {
  1199. return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator);
  1200. }
  1201. template<typename T>
  1202. NCNN_FORCEINLINE Mat::operator T*()
  1203. {
  1204. return (T*)data;
  1205. }
  1206. template<typename T>
  1207. NCNN_FORCEINLINE Mat::operator const T*() const
  1208. {
  1209. return (const T*)data;
  1210. }
  1211. NCNN_FORCEINLINE float& Mat::operator[](size_t i)
  1212. {
  1213. return ((float*)data)[i];
  1214. }
  1215. NCNN_FORCEINLINE const float& Mat::operator[](size_t i) const
  1216. {
  1217. return ((const float*)data)[i];
  1218. }
  1219. #if NCNN_VULKAN
  1220. NCNN_FORCEINLINE VkMat::VkMat()
  1221. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  1222. {
  1223. }
  1224. NCNN_FORCEINLINE VkMat::VkMat(int _w, size_t _elemsize, VkAllocator* _allocator)
  1225. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  1226. {
  1227. create(_w, _elemsize, _allocator);
  1228. }
  1229. NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
  1230. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  1231. {
  1232. create(_w, _h, _elemsize, _allocator);
  1233. }
  1234. NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
  1235. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  1236. {
  1237. create(_w, _h, _c, _elemsize, _allocator);
  1238. }
  1239. NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
  1240. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  1241. {
  1242. create(_w, _h, _d, _c, _elemsize, _allocator);
  1243. }
  1244. NCNN_FORCEINLINE VkMat::VkMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1245. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  1246. {
  1247. create(_w, _elemsize, _elempack, _allocator);
  1248. }
  1249. NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1250. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  1251. {
  1252. create(_w, _h, _elemsize, _elempack, _allocator);
  1253. }
  1254. NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1255. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  1256. {
  1257. create(_w, _h, _c, _elemsize, _elempack, _allocator);
  1258. }
  1259. NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1260. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0)
  1261. {
  1262. create(_w, _h, _d, _c, _elemsize, _elempack, _allocator);
  1263. }
  1264. NCNN_FORCEINLINE VkMat::VkMat(const VkMat& m)
  1265. : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c)
  1266. {
  1267. addref();
  1268. cstep = m.cstep;
  1269. }
  1270. NCNN_FORCEINLINE VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
  1271. : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1)
  1272. {
  1273. cstep = w;
  1274. }
  1275. NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
  1276. : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1)
  1277. {
  1278. cstep = w * h;
  1279. }
  1280. NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
  1281. : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c)
  1282. {
  1283. cstep = alignSize(w * h * elemsize, 16) / elemsize;
  1284. }
  1285. NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
  1286. : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c)
  1287. {
  1288. cstep = alignSize(w * h * d * elemsize, 16) / elemsize;
  1289. }
  1290. NCNN_FORCEINLINE VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1291. : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1)
  1292. {
  1293. cstep = w;
  1294. }
  1295. NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1296. : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1)
  1297. {
  1298. cstep = w * h;
  1299. }
  1300. NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1301. : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c)
  1302. {
  1303. cstep = alignSize(w * h * elemsize, 16) / elemsize;
  1304. }
  1305. NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1306. : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c)
  1307. {
  1308. cstep = alignSize(w * h * d * elemsize, 16) / elemsize;
  1309. }
  1310. NCNN_FORCEINLINE VkMat::~VkMat()
  1311. {
  1312. release();
  1313. }
  1314. NCNN_FORCEINLINE VkMat& VkMat::operator=(const VkMat& m)
  1315. {
  1316. if (this == &m)
  1317. return *this;
  1318. if (m.refcount)
  1319. NCNN_XADD(m.refcount, 1);
  1320. release();
  1321. data = m.data;
  1322. refcount = m.refcount;
  1323. elemsize = m.elemsize;
  1324. elempack = m.elempack;
  1325. allocator = m.allocator;
  1326. dims = m.dims;
  1327. w = m.w;
  1328. h = m.h;
  1329. d = m.d;
  1330. c = m.c;
  1331. cstep = m.cstep;
  1332. return *this;
  1333. }
  1334. NCNN_FORCEINLINE Mat VkMat::mapped() const
  1335. {
  1336. if (!allocator->mappable)
  1337. return Mat();
  1338. if (dims == 1)
  1339. return Mat(w, mapped_ptr(), elemsize, elempack, 0);
  1340. if (dims == 2)
  1341. return Mat(w, h, mapped_ptr(), elemsize, elempack, 0);
  1342. if (dims == 3)
  1343. return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0);
  1344. if (dims == 4)
  1345. return Mat(w, h, d, c, mapped_ptr(), elemsize, elempack, 0);
  1346. return Mat();
  1347. }
  1348. NCNN_FORCEINLINE void* VkMat::mapped_ptr() const
  1349. {
  1350. if (!allocator->mappable)
  1351. return 0;
  1352. return (unsigned char*)data->mapped_ptr + data->offset;
  1353. }
  1354. NCNN_FORCEINLINE void VkMat::addref()
  1355. {
  1356. if (refcount)
  1357. NCNN_XADD(refcount, 1);
  1358. }
  1359. NCNN_FORCEINLINE void VkMat::release()
  1360. {
  1361. if (refcount && NCNN_XADD(refcount, -1) == 1)
  1362. {
  1363. if (allocator && data)
  1364. {
  1365. allocator->fastFree(data);
  1366. }
  1367. }
  1368. data = 0;
  1369. elemsize = 0;
  1370. elempack = 0;
  1371. dims = 0;
  1372. w = 0;
  1373. h = 0;
  1374. d = 0;
  1375. c = 0;
  1376. cstep = 0;
  1377. refcount = 0;
  1378. }
  1379. NCNN_FORCEINLINE bool VkMat::empty() const
  1380. {
  1381. return data == 0 || total() == 0;
  1382. }
  1383. NCNN_FORCEINLINE size_t VkMat::total() const
  1384. {
  1385. return cstep * c;
  1386. }
  1387. NCNN_FORCEINLINE int VkMat::elembits() const
  1388. {
  1389. return elempack ? static_cast<int>(elemsize) * 8 / elempack : 0;
  1390. }
  1391. NCNN_FORCEINLINE Mat VkMat::shape() const
  1392. {
  1393. if (dims == 1)
  1394. return Mat(w * elempack, (void*)0);
  1395. if (dims == 2)
  1396. return Mat(w, h * elempack, (void*)0);
  1397. if (dims == 3)
  1398. return Mat(w, h, c * elempack, (void*)0);
  1399. if (dims == 4)
  1400. return Mat(w, h, d, c * elempack, (void*)0);
  1401. return Mat();
  1402. }
  1403. NCNN_FORCEINLINE VkBuffer VkMat::buffer() const
  1404. {
  1405. return data->buffer;
  1406. }
  1407. NCNN_FORCEINLINE size_t VkMat::buffer_offset() const
  1408. {
  1409. return data->offset;
  1410. }
  1411. NCNN_FORCEINLINE size_t VkMat::buffer_capacity() const
  1412. {
  1413. return data->capacity;
  1414. }
  1415. NCNN_FORCEINLINE VkImageMat::VkImageMat()
  1416. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
  1417. {
  1418. }
  1419. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, size_t _elemsize, VkAllocator* _allocator)
  1420. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
  1421. {
  1422. create(_w, _elemsize, _allocator);
  1423. }
  1424. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
  1425. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
  1426. {
  1427. create(_w, _h, _elemsize, _allocator);
  1428. }
  1429. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
  1430. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
  1431. {
  1432. create(_w, _h, _c, _elemsize, _allocator);
  1433. }
  1434. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
  1435. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
  1436. {
  1437. create(_w, _h, _d, _c, _elemsize, _allocator);
  1438. }
  1439. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1440. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
  1441. {
  1442. create(_w, _elemsize, _elempack, _allocator);
  1443. }
  1444. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1445. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
  1446. {
  1447. create(_w, _h, _elemsize, _elempack, _allocator);
  1448. }
  1449. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1450. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
  1451. {
  1452. create(_w, _h, _c, _elemsize, _elempack, _allocator);
  1453. }
  1454. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1455. : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0)
  1456. {
  1457. create(_w, _h, _d, _c, _elemsize, _elempack, _allocator);
  1458. }
  1459. NCNN_FORCEINLINE VkImageMat::VkImageMat(const VkImageMat& m)
  1460. : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c)
  1461. {
  1462. addref();
  1463. }
  1464. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
  1465. : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1)
  1466. {
  1467. }
  1468. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
  1469. : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1)
  1470. {
  1471. }
  1472. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
  1473. : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c)
  1474. {
  1475. }
  1476. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
  1477. : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c)
  1478. {
  1479. }
  1480. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1481. : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1)
  1482. {
  1483. }
  1484. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1485. : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1)
  1486. {
  1487. }
  1488. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1489. : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c)
  1490. {
  1491. }
  1492. NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
  1493. : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c)
  1494. {
  1495. }
  1496. NCNN_FORCEINLINE VkImageMat::~VkImageMat()
  1497. {
  1498. release();
  1499. }
  1500. NCNN_FORCEINLINE VkImageMat& VkImageMat::operator=(const VkImageMat& m)
  1501. {
  1502. if (this == &m)
  1503. return *this;
  1504. if (m.refcount)
  1505. NCNN_XADD(m.refcount, 1);
  1506. release();
  1507. data = m.data;
  1508. refcount = m.refcount;
  1509. elemsize = m.elemsize;
  1510. elempack = m.elempack;
  1511. allocator = m.allocator;
  1512. dims = m.dims;
  1513. w = m.w;
  1514. h = m.h;
  1515. d = m.d;
  1516. c = m.c;
  1517. return *this;
  1518. }
  1519. NCNN_FORCEINLINE Mat VkImageMat::mapped() const
  1520. {
  1521. if (!allocator->mappable || !data->mapped_ptr)
  1522. return Mat();
  1523. if (dims == 1)
  1524. return Mat(w, mapped_ptr(), elemsize, elempack, 0);
  1525. if (dims == 2)
  1526. return Mat(w, h, mapped_ptr(), elemsize, elempack, 0);
  1527. if (dims == 3)
  1528. return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0);
  1529. if (dims == 4)
  1530. return Mat(w, h, d, c, mapped_ptr(), elemsize, elempack, 0);
  1531. return Mat();
  1532. }
  1533. NCNN_FORCEINLINE void* VkImageMat::mapped_ptr() const
  1534. {
  1535. if (!allocator->mappable || !data->mapped_ptr)
  1536. return 0;
  1537. return (unsigned char*)data->mapped_ptr + data->bind_offset;
  1538. }
  1539. NCNN_FORCEINLINE void VkImageMat::addref()
  1540. {
  1541. if (refcount)
  1542. NCNN_XADD(refcount, 1);
  1543. }
  1544. NCNN_FORCEINLINE void VkImageMat::release()
  1545. {
  1546. if (refcount && NCNN_XADD(refcount, -1) == 1)
  1547. {
  1548. if (allocator && data)
  1549. {
  1550. allocator->fastFree(data);
  1551. }
  1552. }
  1553. data = 0;
  1554. elemsize = 0;
  1555. elempack = 0;
  1556. dims = 0;
  1557. w = 0;
  1558. h = 0;
  1559. d = 0;
  1560. c = 0;
  1561. refcount = 0;
  1562. }
  1563. NCNN_FORCEINLINE bool VkImageMat::empty() const
  1564. {
  1565. return data == 0 || total() == 0;
  1566. }
  1567. NCNN_FORCEINLINE size_t VkImageMat::total() const
  1568. {
  1569. return w * h * d * c;
  1570. }
  1571. NCNN_FORCEINLINE int VkImageMat::elembits() const
  1572. {
  1573. return elempack ? static_cast<int>(elemsize) * 8 / elempack : 0;
  1574. }
  1575. NCNN_FORCEINLINE Mat VkImageMat::shape() const
  1576. {
  1577. if (dims == 1)
  1578. return Mat(w * elempack, (void*)0);
  1579. if (dims == 2)
  1580. return Mat(w, h * elempack, (void*)0);
  1581. if (dims == 3)
  1582. return Mat(w, h, c * elempack, (void*)0);
  1583. if (dims == 4)
  1584. return Mat(w, h, d, c * elempack, (void*)0);
  1585. return Mat();
  1586. }
  1587. NCNN_FORCEINLINE VkImage VkImageMat::image() const
  1588. {
  1589. return data->image;
  1590. }
  1591. NCNN_FORCEINLINE VkImageView VkImageMat::imageview() const
  1592. {
  1593. return data->imageview;
  1594. }
  1595. #endif // NCNN_VULKAN
  1596. } // namespace ncnn
  1597. #endif // NCNN_MAT_H