quantize.h 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. /* Copyright 2019-2020 Canaan Inc.
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. #pragma once
  16. #include "datatypes.h"
  17. #include <cassert>
  18. #include <cmath>
  19. #include <limits>
  20. namespace nncase
  21. {
  22. namespace quant
  23. {
  24. template <class TIt>
  25. value_range<float> get_range(TIt begin, TIt end)
  26. {
  27. float min = std::numeric_limits<float>::max();
  28. float max = std::numeric_limits<float>::min();
  29. while (begin != end)
  30. {
  31. auto value = *begin++;
  32. auto fc = std::fpclassify(value);
  33. if (fc == FP_NORMAL || fc == FP_SUBNORMAL || fc == FP_ZERO)
  34. {
  35. min = std::min(min, value);
  36. max = std::max(max, value);
  37. }
  38. }
  39. return { min, max };
  40. }
  41. inline value_range<float> fixup_range(value_range<float> range)
  42. {
  43. if (range.min < -1e3)
  44. range.min = -1e3;
  45. if (range.max > 1e3)
  46. range.max = 1e3;
  47. auto r = range.max - range.min;
  48. if (r == 0)
  49. r = 0.1f;
  50. else if (r < 0.01f)
  51. r = 0.01f;
  52. range.max = range.min + r;
  53. if (range.max < 0)
  54. range.max = 0;
  55. if (range.min > 0)
  56. range.min = 0;
  57. return range;
  58. }
  59. inline quant_param_t get_quant_param(value_range<float> range, int32_t bits)
  60. {
  61. range = fixup_range(range);
  62. auto r = range.max - range.min;
  63. auto scale = ((1LL << bits) - 1) / r;
  64. auto bias = std::round(-range.min * scale);
  65. assert(bias >= 0);
  66. return { static_cast<int32_t>(bias), scale };
  67. }
  68. inline fixed_mul get_fixed_mul(float value, int32_t max_bits, uint8_t max_shift, bool is_signed)
  69. {
  70. assert(!is_signed || value >= 0);
  71. auto bits = is_signed ? max_bits - 1 : max_bits;
  72. int32_t shift = 0;
  73. float mul = 0;
  74. if (std::abs(value) > 1)
  75. {
  76. int mul_shift;
  77. mul = std::frexp(value, &mul_shift);
  78. shift = std::min((int32_t)max_shift, bits - mul_shift);
  79. mul = mul * std::pow(2.f, shift + mul_shift);
  80. }
  81. else if (value == 0)
  82. {
  83. mul = 0;
  84. shift = 0;
  85. }
  86. else
  87. {
  88. int mul_shift;
  89. mul = std::frexp(value, &mul_shift);
  90. shift = std::min(max_shift + mul_shift, bits);
  91. mul = mul * std::pow(2.f, shift);
  92. shift -= mul_shift;
  93. }
  94. assert(std::abs(mul) < std::pow(2, bits));
  95. assert(shift >= 0 && shift <= max_shift);
  96. assert(std::abs(value - mul * std::pow(2, -shift)) <= std::numeric_limits<float>::epsilon());
  97. return { mul, static_cast<int8_t>(shift) };
  98. }
  99. }
  100. }