option.h 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #ifndef NCNN_OPTION_H
  15. #define NCNN_OPTION_H
  16. #include "platform.h"
  17. namespace ncnn {
  18. #if NCNN_VULKAN
  19. class VkAllocator;
  20. class PipelineCache;
  21. #endif // NCNN_VULKAN
  22. class Allocator;
  23. class NCNN_EXPORT Option
  24. {
  25. public:
  26. // default option
  27. Option();
  28. public:
  29. // light mode
  30. // intermediate blob will be recycled when enabled
  31. // enabled by default
  32. bool lightmode;
  33. // thread count
  34. // default value is the one returned by get_cpu_count()
  35. int num_threads;
  36. // blob memory allocator
  37. Allocator* blob_allocator;
  38. // workspace memory allocator
  39. Allocator* workspace_allocator;
  40. #if NCNN_VULKAN
  41. // blob memory allocator
  42. VkAllocator* blob_vkallocator;
  43. // workspace memory allocator
  44. VkAllocator* workspace_vkallocator;
  45. // staging memory allocator
  46. VkAllocator* staging_vkallocator;
  47. // pipeline cache
  48. PipelineCache* pipeline_cache;
  49. #endif // NCNN_VULKAN
  50. // the time openmp threads busy-wait for more work before going to sleep
  51. // default value is 20ms to keep the cores enabled
  52. // without too much extra power consumption afterwards
  53. int openmp_blocktime;
  54. // enable winograd convolution optimization
  55. // improve convolution 3x3 stride1 performance, may consume more memory
  56. // changes should be applied before loading network structure and weight
  57. // enabled by default
  58. bool use_winograd_convolution;
  59. // enable sgemm convolution optimization
  60. // improve convolution 1x1 stride1 performance, may consume more memory
  61. // changes should be applied before loading network structure and weight
  62. // enabled by default
  63. bool use_sgemm_convolution;
  64. // enable quantized int8 inference
  65. // use low-precision int8 path for quantized model
  66. // changes should be applied before loading network structure and weight
  67. // enabled by default
  68. bool use_int8_inference;
  69. // enable vulkan compute
  70. bool use_vulkan_compute;
  71. // enable bf16 data type for storage
  72. // improve most operator performance on all arm devices, may consume more memory
  73. bool use_bf16_storage;
  74. // enable options for gpu inference
  75. bool use_fp16_packed;
  76. bool use_fp16_storage;
  77. bool use_fp16_arithmetic;
  78. bool use_int8_packed;
  79. bool use_int8_storage;
  80. bool use_int8_arithmetic;
  81. // enable simd-friendly packed memory layout
  82. // improve all operator performance on all arm devices, will consume more memory
  83. // changes should be applied before loading network structure and weight
  84. // enabled by default
  85. bool use_packing_layout;
  86. bool use_shader_pack8;
  87. // subgroup option
  88. bool use_subgroup_basic;
  89. bool use_subgroup_vote;
  90. bool use_subgroup_ballot;
  91. bool use_subgroup_shuffle;
  92. // turn on for adreno
  93. bool use_image_storage;
  94. bool use_tensor_storage;
  95. bool use_reserved_0;
  96. // enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero)
  97. // default value is 3
  98. // 0 = DAZ OFF, FTZ OFF
  99. // 1 = DAZ ON , FTZ OFF
  100. // 2 = DAZ OFF, FTZ ON
  101. // 3 = DAZ ON, FTZ ON
  102. int flush_denormals;
  103. bool use_local_pool_allocator;
  104. // enable local memory optimization for gpu inference
  105. bool use_shader_local_memory;
  106. // enable cooperative matrix optimization for gpu inference
  107. bool use_cooperative_matrix;
  108. // more fine-grained control of winograd convolution
  109. bool use_winograd23_convolution;
  110. bool use_winograd43_convolution;
  111. bool use_winograd63_convolution;
  112. bool use_reserved_6;
  113. bool use_reserved_7;
  114. bool use_reserved_8;
  115. bool use_reserved_9;
  116. bool use_reserved_10;
  117. bool use_reserved_11;
  118. };
  119. } // namespace ncnn
  120. #endif // NCNN_OPTION_H