| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228 |
- /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==============================================================================*/
- #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
- #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
- #include <limits>
- #include <vector>
- #include "fixedpoint/fixedpoint.h"
- #include "tensorflow/lite/kernels/internal/common.h"
- #include "tensorflow/lite/kernels/internal/cppmath.h"
- #include "tensorflow/lite/kernels/internal/quantization_util.h"
- #include "tensorflow/lite/kernels/internal/types.h"
- #include "tensorflow/lite/kernels/op_macros.h"
- namespace tflite {
- namespace reference_ops {
- inline void Softmax(const SoftmaxParams& params,
- const RuntimeShape& input_shape, const float* input_data,
- const RuntimeShape& output_shape, float* output_data) {
- const int trailing_dim = input_shape.DimensionsCount() - 1;
- const int outer_size =
- MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
- const int depth =
- MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
- for (int i = 0; i < outer_size; ++i) {
- // Find max element value which we'll use to ensure numerical stability
- // taking advantage of the following equality:
- // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
- float max = std::numeric_limits<float>::lowest();
- for (int c = 0; c < depth; ++c) {
- max = std::max(max, input_data[i * depth + c]);
- }
- // Compute sum.
- float sum = 0.f;
- for (int c = 0; c < depth; ++c) {
- sum += std::exp((input_data[i * depth + c] - max) *
- static_cast<float>(params.beta));
- }
- // Compute result.
- for (int c = 0; c < depth; ++c) {
- output_data[i * depth + c] = std::exp((input_data[i * depth + c] - max) *
- static_cast<float>(params.beta)) /
- sum;
- }
- }
- }
- // Quantized softmax with int8_t/uint8_t input and int8_t/uint8_t/int16_t
- // output.
- template <typename InputT, typename OutputT>
- inline void Softmax(const SoftmaxParams& params,
- const RuntimeShape& input_shape, const InputT* input_data,
- const RuntimeShape& output_shape, OutputT* output_data) {
- const int32_t input_beta_multiplier = params.input_multiplier;
- const int32_t input_beta_left_shift = params.input_left_shift;
- const int diff_min = params.diff_min;
- // The representation chosen for the input to the exp() function is Q5.26.
- // We need to leave extra space since values that we skip might be as large as
- // -32 before multiplying by input_beta_multiplier, and therefore as large as
- // -16 afterwards. Note that exp(-8) is definitely not insignificant to
- // accumulation, but exp(-16) definitely is.
- static const int kScaledDiffIntegerBits = 5;
- static const int kAccumulationIntegerBits = 12;
- using FixedPointScaledDiff =
- gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
- using FixedPointAccum =
- gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
- using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
- const int trailing_dim = input_shape.DimensionsCount() - 1;
- const int outer_size =
- MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
- const int depth =
- MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
- for (int i = 0; i < outer_size; ++i) {
- InputT max_in_row = std::numeric_limits<InputT>::min();
- for (int c = 0; c < depth; ++c) {
- max_in_row = std::max(max_in_row, input_data[i * depth + c]);
- }
- FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
- for (int c = 0; c < depth; ++c) {
- int32_t input_diff =
- static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
- if (input_diff >= diff_min) {
- const int32_t input_diff_rescaled =
- MultiplyByQuantizedMultiplierGreaterThanOne(
- input_diff, input_beta_multiplier, input_beta_left_shift);
- const FixedPointScaledDiff scaled_diff_f8 =
- FixedPointScaledDiff::FromRaw(input_diff_rescaled);
- sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
- exp_on_negative_values(scaled_diff_f8));
- }
- }
- int num_bits_over_unit;
- FixedPoint0 shifted_scale = FixedPoint0::FromRaw(GetReciprocal(
- sum_of_exps.raw(), kAccumulationIntegerBits, &num_bits_over_unit));
- for (int c = 0; c < depth; ++c) {
- int32_t input_diff =
- static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
- if (input_diff >= diff_min) {
- const int32_t input_diff_rescaled =
- MultiplyByQuantizedMultiplierGreaterThanOne(
- input_diff, input_beta_multiplier, input_beta_left_shift);
- const FixedPointScaledDiff scaled_diff_f8 =
- FixedPointScaledDiff::FromRaw(input_diff_rescaled);
- FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
- int32_t unsat_output = gemmlowp::RoundingDivideByPOT(
- (shifted_scale * exp_in_0).raw(),
- num_bits_over_unit + 31 - (sizeof(OutputT) * 8));
- const int32_t shifted_output =
- unsat_output +
- static_cast<int32_t>(std::numeric_limits<OutputT>::min());
- output_data[i * depth + c] = static_cast<OutputT>(std::max(
- std::min(shifted_output,
- static_cast<int32_t>(std::numeric_limits<OutputT>::max())),
- static_cast<int32_t>(std::numeric_limits<OutputT>::min())));
- } else {
- output_data[i * depth + c] = std::numeric_limits<OutputT>::min();
- }
- }
- }
- }
- // Quantized softmax with int16_t input and int16_t output.
- inline void SoftmaxInt16(const SoftmaxParams& params,
- const RuntimeShape& input_shape,
- const int16_t* input_data,
- const RuntimeShape& output_shape,
- int16_t* output_data) {
- const int trailing_dim = input_shape.DimensionsCount() - 1;
- const int outer_size =
- MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
- const int depth =
- MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
- for (int i = 0; i < outer_size; ++i) {
- // Find the largest element
- int16_t max_in_row = std::numeric_limits<int16_t>::min();
- for (int c = 0; c < depth; ++c) {
- max_in_row = std::max(max_in_row, input_data[i * depth + c]);
- }
- // Compute exp(input - max_input)
- std::vector<int16_t> exp_result_Q015(depth);
- for (int c = 0; c < depth; ++c) {
- int32_t input_diff = input_data[i * depth + c] - max_in_row;
- // scale the input_diff such that [-65535, 0] correspond to [-10.0, 0.0]
- int32_t scaled_diff = MultiplyByQuantizedMultiplier(
- input_diff, params.input_multiplier, params.input_left_shift);
- // recenter to [-32768, 32767]
- int32_t sym_scaled_diff = scaled_diff + 32767;
- int16_t sat_sym_scaled_diff =
- std::min(std::max(sym_scaled_diff, static_cast<int32_t>(-32768)),
- static_cast<int32_t>(32767));
- // apply the exp() LUT activation function
- exp_result_Q015[c] =
- generic_int16_table_lookup(sat_sym_scaled_diff, params.exp_lut);
- }
- // sum_of_exps is a Q16.15 fixed point format.
- int32_t sum_of_exps = 0;
- for (int c = 0; c < depth; ++c) {
- // Q16.15 + Q0.15
- sum_of_exps += exp_result_Q015[c];
- }
- // Compute the reciprocal 1/sum_of_exps
- uint8_t headroom_plus_one =
- CountLeadingZeros(static_cast<uint32_t>(sum_of_exps));
- int32_t shifted_sum =
- ((static_cast<int64_t>(sum_of_exps) << (headroom_plus_one - 1)) +
- (1 << 13)) >>
- 14;
- // since the LUT computes 1/(1 + x) we need to first compute x = (sum - 1).
- // also, the LUT expects a symmetrical input, so we must also recenter x
- // from [0, 65535] to [-32768, 32767].
- int32_t sym_shifted_sum = shifted_sum + (-((1 << 15) + (1 << 16)));
- int16_t sat_sym_shifted_sum = static_cast<int16_t>(
- std::min(std::max(sym_shifted_sum, static_cast<int32_t>(-32768)),
- static_cast<int32_t>(32767)));
- // apply 1/(1 + x) LUT activation function
- int16_t reciprocal_scale_Q015 = generic_int16_table_lookup(
- sat_sym_shifted_sum, params.one_over_one_plus_x_lut);
- // Rescale the exp_result with reciprocal
- // range of output is [0, 32767] correspond to [0.0, 1.0]
- for (int c = 0; c < depth; ++c) {
- uint8_t right_shift = 31 - headroom_plus_one;
- int64_t round = 1 << (right_shift - 1);
- int32_t result = (static_cast<int64_t>(exp_result_Q015[c]) *
- static_cast<int64_t>(reciprocal_scale_Q015) +
- round) >>
- right_shift;
- output_data[i * depth + c] = static_cast<int16_t>(
- std::min(std::max(result, static_cast<int32_t>(0)),
- static_cast<int32_t>(32767)));
- }
- }
- }
- } // namespace reference_ops
- } // namespace tflite
- #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
|