kernel_util.cc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2. Licensed under the Apache License, Version 2.0 (the "License");
  3. you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at
  5. http://www.apache.org/licenses/LICENSE-2.0
  6. Unless required by applicable law or agreed to in writing, software
  7. distributed under the License is distributed on an "AS IS" BASIS,
  8. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. See the License for the specific language governing permissions and
  10. limitations under the License.
  11. ==============================================================================*/
  12. #include "tensorflow/lite/kernels/kernel_util.h"
  13. #include <algorithm>
  14. #include <cmath>
  15. #include <memory>
  16. #include "tensorflow/lite/kernels/internal/cppmath.h"
  17. #include "tensorflow/lite/kernels/internal/quantization_util.h"
  18. namespace tflite {
  19. // Per-axis
  20. TfLiteStatus PopulateConvolutionQuantizationParams(
  21. TfLiteContext* context, const TfLiteTensor* input,
  22. const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
  23. const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
  24. int32_t* output_activation_min, int32_t* output_activation_max,
  25. int32_t* per_channel_multiplier, int* per_channel_shift) {
  26. const auto* affine_quantization =
  27. reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
  28. return PopulateConvolutionQuantizationParams(
  29. context, input, filter, bias, output, activation, multiplier, shift,
  30. output_activation_min, output_activation_max, per_channel_multiplier,
  31. per_channel_shift, affine_quantization->scale->size);
  32. }
  33. // Per-axis & per-tensor
  34. TfLiteStatus PopulateConvolutionQuantizationParams(
  35. TfLiteContext* context, const TfLiteTensor* input,
  36. const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
  37. const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
  38. int32_t* output_activation_min, int32_t* output_activation_max,
  39. int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels) {
  40. TF_LITE_ENSURE_EQ(context, input->quantization.type,
  41. kTfLiteAffineQuantization);
  42. TF_LITE_ENSURE_EQ(context, filter->quantization.type,
  43. kTfLiteAffineQuantization);
  44. // TODO(jianlijianli): Enable bias type check and bias scale == input scale
  45. // * filter scale for each channel in affine quantization once bias
  46. // quantization is properly populated.
  47. // TF_LITE_ENSURE_EQ(context, bias->quantization.type,
  48. // kTfLiteAffineQuantization);
  49. // Check data type.
  50. const auto* affine_quantization =
  51. reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
  52. TF_LITE_ENSURE(context, affine_quantization);
  53. TF_LITE_ENSURE(context, affine_quantization->scale);
  54. const bool is_per_channel = affine_quantization->scale->size > 1;
  55. if (is_per_channel) {
  56. // Currently only Int8/Int16 is supported for per channel quantization.
  57. TF_LITE_ENSURE(context,
  58. input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
  59. TF_LITE_ENSURE_EQ(context, filter->type, kTfLiteInt8);
  60. TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, num_channels);
  61. TF_LITE_ENSURE_EQ(
  62. context, num_channels,
  63. filter->dims->data[affine_quantization->quantized_dimension]);
  64. }
  65. // Populate multiplier and shift using affine quantization.
  66. const float input_scale = input->params.scale;
  67. const float output_scale = output->params.scale;
  68. const float* filter_scales = affine_quantization->scale->data;
  69. for (int i = 0; i < num_channels; ++i) {
  70. // If per-tensor quantization parameter is specified, broadcast it along the
  71. // quantization dimension (channels_out).
  72. const float scale = is_per_channel ? filter_scales[i] : filter_scales[0];
  73. const double filter_scale = static_cast<double>(scale);
  74. const double effective_output_scale = static_cast<double>(input_scale) *
  75. filter_scale /
  76. static_cast<double>(output_scale);
  77. int32_t significand;
  78. int channel_shift;
  79. QuantizeMultiplier(effective_output_scale, &significand, &channel_shift);
  80. per_channel_multiplier[i] = significand;
  81. per_channel_shift[i] = channel_shift;
  82. }
  83. // Populate scalar quantization parameters.
  84. // This check on legacy quantization parameters is kept only for backward
  85. // compatibility.
  86. if (input->type == kTfLiteUInt8) {
  87. // Check bias scale == input scale * filter scale.
  88. double real_multiplier = 0.0;
  89. TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
  90. context, input, filter, bias, output, &real_multiplier));
  91. int exponent;
  92. // Populate quantization parameters with multiplier and shift.
  93. QuantizeMultiplier(real_multiplier, multiplier, &exponent);
  94. *shift = -exponent;
  95. }
  96. if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8 ||
  97. input->type == kTfLiteInt16) {
  98. TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
  99. context, activation, output, output_activation_min,
  100. output_activation_max));
  101. }
  102. return kTfLiteOk;
  103. }
  104. TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
  105. const TfLiteTensor* input,
  106. const TfLiteTensor* filter,
  107. const TfLiteTensor* bias,
  108. TfLiteTensor* output,
  109. double* multiplier) {
  110. const double input_product_scale = static_cast<double>(input->params.scale) *
  111. static_cast<double>(filter->params.scale);
  112. // TODO(ahentz): The following conditions must be guaranteed by the training
  113. // pipeline.
  114. if (bias) {
  115. const double bias_scale = static_cast<double>(bias->params.scale);
  116. // Here we're making sure the input_product_scale & bias_scale are about the
  117. // same. Since we have:
  118. // (output - output_zp) * output_scale =
  119. // input_product_scale * input_product + bias * bias_scale ---- (0)
  120. //
  121. // (0) equals:
  122. // (input_product + bias) * input_product_scale ----- (1)
  123. // +
  124. // bias * (bias_scale - input_product_scale) ------ (2)
  125. //
  126. // For the real kernel computation, we're doing (1), so we really need to
  127. // make sure (2) has minimum impact on the output, so:
  128. // bias * (bias_scale - input_product_scale) / output_scale should be
  129. // a small number for an integer.
  130. // Since normally bias should be within a small range.
  131. // We should expect (bias_scale - input_product_scale) / output_scale to
  132. // be a small number like 0.02.
  133. const double scale_diff = std::abs(input_product_scale - bias_scale);
  134. const double output_scale = static_cast<double>(output->params.scale);
  135. TF_LITE_ENSURE(context, scale_diff / output_scale <= 0.02);
  136. }
  137. return GetQuantizedConvolutionMultipler(context, input, filter, output,
  138. multiplier);
  139. }
  140. TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
  141. const TfLiteTensor* input,
  142. const TfLiteTensor* filter,
  143. TfLiteTensor* output,
  144. double* multiplier) {
  145. const double input_product_scale =
  146. static_cast<double>(input->params.scale * filter->params.scale);
  147. TF_LITE_ENSURE(context, input_product_scale >= 0);
  148. *multiplier = input_product_scale / static_cast<double>(output->params.scale);
  149. return kTfLiteOk;
  150. }
  151. namespace {
  152. void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,
  153. int32_t qmin, int32_t qmax,
  154. TfLiteTensor* output,
  155. int32_t* act_min, int32_t* act_max) {
  156. const auto scale = output->params.scale;
  157. const auto zero_point = output->params.zero_point;
  158. auto quantize = [scale, zero_point](float f) {
  159. return zero_point + static_cast<int32_t>(TfLiteRound(f / scale));
  160. };
  161. if (activation == kTfLiteActRelu) {
  162. *act_min = std::max(qmin, quantize(0.0));
  163. *act_max = qmax;
  164. } else if (activation == kTfLiteActRelu6) {
  165. *act_min = std::max(qmin, quantize(0.0));
  166. *act_max = std::min(qmax, quantize(6.0));
  167. } else if (activation == kTfLiteActRelu1) {
  168. *act_min = std::max(qmin, quantize(-1.0));
  169. *act_max = std::min(qmax, quantize(1.0));
  170. } else {
  171. *act_min = qmin;
  172. *act_max = qmax;
  173. }
  174. }
  175. } // namespace
  176. TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
  177. TfLiteFusedActivation activation,
  178. TfLiteTensor* output,
  179. int32_t* act_min,
  180. int32_t* act_max) {
  181. int32_t qmin = 0;
  182. int32_t qmax = 0;
  183. if (output->type == kTfLiteUInt8) {
  184. qmin = std::numeric_limits<uint8_t>::min();
  185. qmax = std::numeric_limits<uint8_t>::max();
  186. } else if (output->type == kTfLiteInt8) {
  187. qmin = std::numeric_limits<int8_t>::min();
  188. qmax = std::numeric_limits<int8_t>::max();
  189. } else if (output->type == kTfLiteInt16) {
  190. qmin = std::numeric_limits<int16_t>::min();
  191. qmax = std::numeric_limits<int16_t>::max();
  192. } else {
  193. TF_LITE_ENSURE(context, false);
  194. }
  195. CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
  196. act_max);
  197. return kTfLiteOk;
  198. }
  199. bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
  200. return TfLiteIntArrayEqual(input1->dims, input2->dims);
  201. }
  202. // TODO(petewarden): Having macros around this is ugly, look at other strategies
  203. // before replicating this approach elsewhere.
  204. #ifndef TF_LITE_STATIC_MEMORY
  205. TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
  206. const TfLiteTensor* input1,
  207. const TfLiteTensor* input2,
  208. TfLiteIntArray** output_shape) {
  209. int dims1 = NumDimensions(input1);
  210. int dims2 = NumDimensions(input2);
  211. int out_dims = std::max(dims1, dims2);
  212. if (NumElements(input1) == 0) {
  213. *output_shape = TfLiteIntArrayCopy(input1->dims);
  214. return kTfLiteOk;
  215. }
  216. std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
  217. TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
  218. for (int i = 0; i < out_dims; ++i) {
  219. int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
  220. int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
  221. TF_LITE_ENSURE(context, d1 == d2 || d1 == 1 || d2 == 1);
  222. shape->data[out_dims - i - 1] = std::max(d1, d2);
  223. }
  224. *output_shape = shape.release();
  225. return kTfLiteOk;
  226. }
  227. TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
  228. const TfLiteTensor* input1,
  229. const TfLiteTensor* input2,
  230. const TfLiteTensor* input3,
  231. TfLiteIntArray** output_shape) {
  232. int dims1 = NumDimensions(input1);
  233. int dims2 = NumDimensions(input2);
  234. int dims3 = NumDimensions(input3);
  235. int out_dims = std::max(std::max(dims1, dims2), dims3);
  236. std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
  237. TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
  238. for (int i = 0; i < out_dims; ++i) {
  239. int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
  240. int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
  241. int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1);
  242. int max_value = std::max(std::max(d1, d2), d3);
  243. TF_LITE_ENSURE(context, d1 == 1 || d1 == max_value);
  244. TF_LITE_ENSURE(context, d2 == 1 || d2 == max_value);
  245. TF_LITE_ENSURE(context, d3 == 1 || d3 == max_value);
  246. shape->data[out_dims - i - 1] = max_value;
  247. }
  248. *output_shape = shape.release();
  249. return kTfLiteOk;
  250. }
  251. #endif // TF_LITE_STATIC_MEMORY
  252. } // namespace tflite