gpu.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #ifndef NCNN_GPU_H
  15. #define NCNN_GPU_H
  16. #include "platform.h"
  17. #if NCNN_VULKAN
  18. #include "mat.h"
  19. #include <vulkan/vulkan.h>
  20. #include "vulkan_header_fix.h"
  21. namespace ncnn {
  22. // instance
  23. NCNN_EXPORT int create_gpu_instance();
  24. NCNN_EXPORT void destroy_gpu_instance();
  25. // instance extension capability
  26. extern int support_VK_KHR_external_memory_capabilities;
  27. extern int support_VK_KHR_get_physical_device_properties2;
  28. extern int support_VK_KHR_get_surface_capabilities2;
  29. extern int support_VK_KHR_surface;
  30. extern int support_VK_EXT_debug_utils;
  31. #if __ANDROID_API__ >= 26
  32. extern int support_VK_KHR_android_surface;
  33. #endif // __ANDROID_API__ >= 26
  34. // VK_KHR_external_memory_capabilities
  35. extern PFN_vkGetPhysicalDeviceExternalBufferPropertiesKHR vkGetPhysicalDeviceExternalBufferPropertiesKHR;
  36. // VK_KHR_get_physical_device_properties2
  37. extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR;
  38. extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR;
  39. extern PFN_vkGetPhysicalDeviceFormatProperties2KHR vkGetPhysicalDeviceFormatProperties2KHR;
  40. extern PFN_vkGetPhysicalDeviceImageFormatProperties2KHR vkGetPhysicalDeviceImageFormatProperties2KHR;
  41. extern PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR vkGetPhysicalDeviceQueueFamilyProperties2KHR;
  42. extern PFN_vkGetPhysicalDeviceMemoryProperties2KHR vkGetPhysicalDeviceMemoryProperties2KHR;
  43. extern PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR vkGetPhysicalDeviceSparseImageFormatProperties2KHR;
  44. // VK_KHR_get_surface_capabilities2
  45. extern PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR vkGetPhysicalDeviceSurfaceCapabilities2KHR;
  46. extern PFN_vkGetPhysicalDeviceSurfaceFormats2KHR vkGetPhysicalDeviceSurfaceFormats2KHR;
  47. // VK_KHR_surface
  48. extern PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR;
  49. extern PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR;
  50. extern PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR;
  51. extern PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR;
  52. extern PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR;
  53. #if __ANDROID_API__ >= 26
  54. // VK_KHR_android_surface
  55. extern PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR;
  56. #endif // __ANDROID_API__ >= 26
  57. // VK_NV_cooperative_matrix
  58. extern PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV vkGetPhysicalDeviceCooperativeMatrixPropertiesNV;
  59. // get info
  60. NCNN_EXPORT int get_gpu_count();
  61. NCNN_EXPORT int get_default_gpu_index();
  62. class GpuInfoPrivate;
  63. class NCNN_EXPORT GpuInfo
  64. {
  65. public:
  66. explicit GpuInfo();
  67. virtual ~GpuInfo();
  68. // vulkan physical device
  69. VkPhysicalDevice physical_device() const;
  70. // memory properties
  71. const VkPhysicalDeviceMemoryProperties& physical_device_memory_properties() const;
  72. // info
  73. uint32_t api_version() const;
  74. uint32_t driver_version() const;
  75. uint32_t vendor_id() const;
  76. uint32_t device_id() const;
  77. const char* device_name() const;
  78. uint8_t* pipeline_cache_uuid() const;
  79. // 0 = discrete gpu
  80. // 1 = integrated gpu
  81. // 2 = virtual gpu
  82. // 3 = cpu
  83. int type() const;
  84. // hardware limit
  85. uint32_t max_shared_memory_size() const;
  86. uint32_t max_workgroup_count_x() const;
  87. uint32_t max_workgroup_count_y() const;
  88. uint32_t max_workgroup_count_z() const;
  89. uint32_t max_workgroup_invocations() const;
  90. uint32_t max_workgroup_size_x() const;
  91. uint32_t max_workgroup_size_y() const;
  92. uint32_t max_workgroup_size_z() const;
  93. size_t memory_map_alignment() const;
  94. size_t buffer_offset_alignment() const;
  95. size_t non_coherent_atom_size() const;
  96. size_t buffer_image_granularity() const;
  97. uint32_t max_image_dimension_1d() const;
  98. uint32_t max_image_dimension_2d() const;
  99. uint32_t max_image_dimension_3d() const;
  100. float timestamp_period() const;
  101. // runtime
  102. uint32_t compute_queue_family_index() const;
  103. uint32_t graphics_queue_family_index() const;
  104. uint32_t transfer_queue_family_index() const;
  105. uint32_t compute_queue_count() const;
  106. uint32_t graphics_queue_count() const;
  107. uint32_t transfer_queue_count() const;
  108. // property
  109. bool unified_compute_transfer_queue() const;
  110. // subgroup
  111. uint32_t subgroup_size() const;
  112. bool support_subgroup_basic() const;
  113. bool support_subgroup_vote() const;
  114. bool support_subgroup_ballot() const;
  115. bool support_subgroup_shuffle() const;
  116. // bug is not feature
  117. bool bug_storage_buffer_no_l1() const;
  118. bool bug_corrupted_online_pipeline_cache() const;
  119. bool bug_buffer_image_load_zero() const;
  120. // but sometimes bug is a feature
  121. bool bug_implicit_fp16_arithmetic() const;
  122. // fp16 and int8 feature
  123. bool support_fp16_packed() const;
  124. bool support_fp16_storage() const;
  125. bool support_fp16_arithmetic() const;
  126. bool support_int8_packed() const;
  127. bool support_int8_storage() const;
  128. bool support_int8_arithmetic() const;
  129. // ycbcr conversion feature
  130. bool support_ycbcr_conversion() const;
  131. // cooperative matrix feature
  132. bool support_cooperative_matrix() const;
  133. bool support_cooperative_matrix_16_8_8() const;
  134. // extension capability
  135. int support_VK_KHR_8bit_storage() const;
  136. int support_VK_KHR_16bit_storage() const;
  137. int support_VK_KHR_bind_memory2() const;
  138. int support_VK_KHR_create_renderpass2() const;
  139. int support_VK_KHR_dedicated_allocation() const;
  140. int support_VK_KHR_descriptor_update_template() const;
  141. int support_VK_KHR_external_memory() const;
  142. int support_VK_KHR_get_memory_requirements2() const;
  143. int support_VK_KHR_maintenance1() const;
  144. int support_VK_KHR_maintenance2() const;
  145. int support_VK_KHR_maintenance3() const;
  146. int support_VK_KHR_multiview() const;
  147. int support_VK_KHR_portability_subset() const;
  148. int support_VK_KHR_push_descriptor() const;
  149. int support_VK_KHR_sampler_ycbcr_conversion() const;
  150. int support_VK_KHR_shader_float16_int8() const;
  151. int support_VK_KHR_shader_float_controls() const;
  152. int support_VK_KHR_storage_buffer_storage_class() const;
  153. int support_VK_KHR_swapchain() const;
  154. int support_VK_EXT_descriptor_indexing() const;
  155. int support_VK_EXT_memory_budget() const;
  156. int support_VK_EXT_queue_family_foreign() const;
  157. #if __ANDROID_API__ >= 26
  158. int support_VK_ANDROID_external_memory_android_hardware_buffer() const;
  159. #endif // __ANDROID_API__ >= 26
  160. int support_VK_NV_cooperative_matrix() const;
  161. private:
  162. GpuInfo(const GpuInfo&);
  163. GpuInfo& operator=(const GpuInfo&);
  164. private:
  165. friend int create_gpu_instance();
  166. GpuInfoPrivate* const d;
  167. };
  168. NCNN_EXPORT const GpuInfo& get_gpu_info(int device_index = get_default_gpu_index());
  169. class VkAllocator;
  170. class VkCompute;
  171. class Option;
  172. class PipelineCache;
  173. class VulkanDevicePrivate;
  174. class NCNN_EXPORT VulkanDevice
  175. {
  176. public:
  177. VulkanDevice(int device_index = get_default_gpu_index());
  178. ~VulkanDevice();
  179. const GpuInfo& info;
  180. VkDevice vkdevice() const;
  181. VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size) const;
  182. // with fixed workgroup size
  183. VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z) const;
  184. // helper for creating pipeline
  185. int create_descriptorset_layout(int binding_count, const int* binding_types, VkDescriptorSetLayout* descriptorset_layout) const;
  186. int create_pipeline_layout(int push_constant_count, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout* pipeline_layout) const;
  187. int create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector<vk_specialization_type>& specializations, VkPipeline* pipeline) const;
  188. int create_descriptor_update_template(int binding_count, const int* binding_types, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout pipeline_layout, VkDescriptorUpdateTemplateKHR* descriptor_update_template) const;
  189. uint32_t find_memory_index(uint32_t memory_type_bits, VkFlags required, VkFlags preferred, VkFlags preferred_not) const;
  190. bool is_mappable(uint32_t memory_type_index) const;
  191. bool is_coherent(uint32_t memory_type_index) const;
  192. VkQueue acquire_queue(uint32_t queue_family_index) const;
  193. void reclaim_queue(uint32_t queue_family_index, VkQueue queue) const;
  194. // allocator on this device
  195. VkAllocator* acquire_blob_allocator() const;
  196. void reclaim_blob_allocator(VkAllocator* allocator) const;
  197. VkAllocator* acquire_staging_allocator() const;
  198. void reclaim_staging_allocator(VkAllocator* allocator) const;
  199. // immutable sampler for texelfetch
  200. const VkSampler* immutable_texelfetch_sampler() const;
  201. // dummy buffer image
  202. VkMat get_dummy_buffer() const;
  203. VkImageMat get_dummy_image() const;
  204. VkImageMat get_dummy_image_readonly() const;
  205. // pipeline cache on this device
  206. const PipelineCache* get_pipeline_cache() const;
  207. // test image allocation
  208. bool shape_support_image_storage(const Mat& shape) const;
  209. // current gpu heap memory budget in MB
  210. uint32_t get_heap_budget() const;
  211. // utility operator
  212. void convert_packing(const VkMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
  213. void convert_packing(const VkImageMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
  214. void convert_packing(const VkMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
  215. void convert_packing(const VkImageMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
  216. // VK_KHR_bind_memory2
  217. PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR;
  218. PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR;
  219. // VK_KHR_create_renderpass2
  220. PFN_vkCmdBeginRenderPass2KHR vkCmdBeginRenderPass2KHR;
  221. PFN_vkCmdEndRenderPass2KHR vkCmdEndRenderPass2KHR;
  222. PFN_vkCmdNextSubpass2KHR vkCmdNextSubpass2KHR;
  223. PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR;
  224. // VK_KHR_descriptor_update_template
  225. PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
  226. PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
  227. PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR;
  228. // VK_KHR_get_memory_requirements2
  229. PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR;
  230. PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR;
  231. PFN_vkGetImageSparseMemoryRequirements2KHR vkGetImageSparseMemoryRequirements2KHR;
  232. // VK_KHR_maintenance1
  233. PFN_vkTrimCommandPoolKHR vkTrimCommandPoolKHR;
  234. // VK_KHR_maintenance3
  235. PFN_vkGetDescriptorSetLayoutSupportKHR vkGetDescriptorSetLayoutSupportKHR;
  236. // VK_KHR_push_descriptor
  237. PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR;
  238. PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR;
  239. // VK_KHR_sampler_ycbcr_conversion
  240. PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR;
  241. PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR;
  242. // VK_KHR_swapchain
  243. PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR;
  244. PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR;
  245. PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR;
  246. PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR;
  247. PFN_vkQueuePresentKHR vkQueuePresentKHR;
  248. #if __ANDROID_API__ >= 26
  249. // VK_ANDROID_external_memory_android_hardware_buffer
  250. PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID;
  251. PFN_vkGetMemoryAndroidHardwareBufferANDROID vkGetMemoryAndroidHardwareBufferANDROID;
  252. #endif // __ANDROID_API__ >= 26
  253. protected:
  254. // device extension
  255. int init_device_extension();
  256. private:
  257. VulkanDevice(const VulkanDevice&);
  258. VulkanDevice& operator=(const VulkanDevice&);
  259. private:
  260. VulkanDevicePrivate* const d;
  261. };
  262. NCNN_EXPORT VulkanDevice* get_gpu_device(int device_index = get_default_gpu_index());
  263. // online spirv compilation
  264. NCNN_EXPORT int compile_spirv_module(const char* comp_string, const Option& opt, std::vector<uint32_t>& spirv);
  265. NCNN_EXPORT int compile_spirv_module(const char* comp_data, int comp_data_size, const Option& opt, std::vector<uint32_t>& spirv);
  266. NCNN_EXPORT int compile_spirv_module(int shader_type_index, const Option& opt, std::vector<uint32_t>& spirv);
  267. // info from spirv
  268. class NCNN_EXPORT ShaderInfo
  269. {
  270. public:
  271. int specialization_count;
  272. int binding_count;
  273. int push_constant_count;
  274. // 0 = null
  275. // 1 = storage buffer
  276. // 2 = storage image
  277. // 3 = combined image sampler
  278. int binding_types[16]; // 16 is large enough I think ...
  279. int reserved_0;
  280. int reserved_1;
  281. int reserved_2;
  282. int reserved_3;
  283. };
  284. NCNN_EXPORT int resolve_shader_info(const uint32_t* spv_data, size_t spv_data_size, ShaderInfo& shader_info);
  285. } // namespace ncnn
  286. #endif // NCNN_VULKAN
  287. #endif // NCNN_GPU_H