| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325 |
- /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==============================================================================*/
- #include "tensorflow/lite/kernels/kernel_util.h"
- #include <stdint.h>
- #include <stdlib.h>
- #include <algorithm>
- #include <limits>
- #include <memory>
- #include "tensorflow/lite/c/builtin_op_data.h"
- #include "tensorflow/lite/c/common.h"
- #include "tensorflow/lite/kernels/internal/cppmath.h"
- #include "tensorflow/lite/kernels/internal/quantization_util.h"
- namespace tflite {
- const TfLiteTensor* GetInput(const TfLiteContext* context,
- const TfLiteNode* node, int index) {
- if (context->tensors != nullptr) {
- return &context->tensors[node->inputs->data[index]];
- } else {
- return context->GetTensor(context, node->inputs->data[index]);
- }
- }
- TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
- int index) {
- TfLiteTensor* tensor = nullptr;
- if (context->tensors != nullptr) {
- tensor = &context->tensors[node->inputs->data[index]];
- } else {
- tensor = context->GetTensor(context, node->inputs->data[index]);
- }
- return tensor->is_variable ? tensor : nullptr;
- }
- TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
- int index) {
- if (context->tensors != nullptr) {
- return &context->tensors[node->outputs->data[index]];
- } else {
- return context->GetTensor(context, node->outputs->data[index]);
- }
- }
- const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context,
- const TfLiteNode* node, int index) {
- const bool use_tensor = index < node->inputs->size &&
- node->inputs->data[index] != kTfLiteOptionalTensor;
- if (use_tensor) {
- if (context->tensors != nullptr) {
- return &context->tensors[node->inputs->data[index]];
- } else {
- return context->GetTensor(context, node->inputs->data[index]);
- }
- }
- return nullptr;
- }
- // Per-axis
- TfLiteStatus PopulateConvolutionQuantizationParams(
- TfLiteContext* context, const TfLiteTensor* input,
- const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
- const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
- int32_t* output_activation_min, int32_t* output_activation_max,
- int32_t* per_channel_multiplier, int* per_channel_shift) {
- const auto* affine_quantization =
- reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
- return PopulateConvolutionQuantizationParams(
- context, input, filter, bias, output, activation, multiplier, shift,
- output_activation_min, output_activation_max, per_channel_multiplier,
- per_channel_shift, affine_quantization->scale->size);
- }
- // Per-axis & per-tensor
- TfLiteStatus PopulateConvolutionQuantizationParams(
- TfLiteContext* context, const TfLiteTensor* input,
- const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
- const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
- int32_t* output_activation_min, int32_t* output_activation_max,
- int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels) {
- TF_LITE_ENSURE_EQ(context, input->quantization.type,
- kTfLiteAffineQuantization);
- TF_LITE_ENSURE_EQ(context, filter->quantization.type,
- kTfLiteAffineQuantization);
- // TODO(jianlijianli): Enable bias type check and bias scale == input scale
- // * filter scale for each channel in affine quantization once bias
- // quantization is properly populated.
- // TF_LITE_ENSURE_EQ(context, bias->quantization.type,
- // kTfLiteAffineQuantization);
- // Check data type.
- const auto* affine_quantization =
- reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
- TF_LITE_ENSURE(context, affine_quantization);
- TF_LITE_ENSURE(context, affine_quantization->scale);
- const bool is_per_channel = affine_quantization->scale->size > 1;
- if (is_per_channel) {
- // Currently only Int8/Int16 is supported for per channel quantization.
- TF_LITE_ENSURE(context,
- input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
- TF_LITE_ENSURE_EQ(context, filter->type, kTfLiteInt8);
- TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, num_channels);
- TF_LITE_ENSURE_EQ(
- context, num_channels,
- filter->dims->data[affine_quantization->quantized_dimension]);
- }
- // Populate multiplier and shift using affine quantization.
- const float input_scale = input->params.scale;
- const float output_scale = output->params.scale;
- const float* filter_scales = affine_quantization->scale->data;
- for (int i = 0; i < num_channels; ++i) {
- // If per-tensor quantization parameter is specified, broadcast it along the
- // quantization dimension (channels_out).
- const float scale = is_per_channel ? filter_scales[i] : filter_scales[0];
- const double filter_scale = static_cast<double>(scale);
- const double effective_output_scale = static_cast<double>(input_scale) *
- filter_scale /
- static_cast<double>(output_scale);
- int32_t significand;
- int channel_shift;
- QuantizeMultiplier(effective_output_scale, &significand, &channel_shift);
- per_channel_multiplier[i] = significand;
- per_channel_shift[i] = channel_shift;
- }
- // Populate scalar quantization parameters.
- // This check on legacy quantization parameters is kept only for backward
- // compatibility.
- if (input->type == kTfLiteUInt8) {
- // Check bias scale == input scale * filter scale.
- double real_multiplier = 0.0;
- TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
- context, input, filter, bias, output, &real_multiplier));
- int exponent;
- // Populate quantization parameters with multiplier and shift.
- QuantizeMultiplier(real_multiplier, multiplier, &exponent);
- *shift = -exponent;
- }
- if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8 ||
- input->type == kTfLiteInt16) {
- TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
- context, activation, output, output_activation_min,
- output_activation_max));
- }
- return kTfLiteOk;
- }
- TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
- const TfLiteTensor* input,
- const TfLiteTensor* filter,
- const TfLiteTensor* bias,
- TfLiteTensor* output,
- double* multiplier) {
- const double input_product_scale = static_cast<double>(input->params.scale) *
- static_cast<double>(filter->params.scale);
- // TODO(ahentz): The following conditions must be guaranteed by the training
- // pipeline.
- if (bias) {
- const double bias_scale = static_cast<double>(bias->params.scale);
- // Here we're making sure the input_product_scale & bias_scale are about the
- // same. Since we have:
- // (output - output_zp) * output_scale =
- // input_product_scale * input_product + bias * bias_scale ---- (0)
- //
- // (0) equals:
- // (input_product + bias) * input_product_scale ----- (1)
- // +
- // bias * (bias_scale - input_product_scale) ------ (2)
- //
- // For the real kernel computation, we're doing (1), so we really need to
- // make sure (2) has minimum impact on the output, so:
- // bias * (bias_scale - input_product_scale) / output_scale should be
- // a small number for an integer.
- // Since normally bias should be within a small range.
- // We should expect (bias_scale - input_product_scale) / output_scale to
- // be a small number like 0.02.
- const double scale_diff = std::abs(input_product_scale - bias_scale);
- const double output_scale = static_cast<double>(output->params.scale);
- TF_LITE_ENSURE(context, scale_diff / output_scale <= 0.02);
- }
- return GetQuantizedConvolutionMultipler(context, input, filter, output,
- multiplier);
- }
- TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
- const TfLiteTensor* input,
- const TfLiteTensor* filter,
- TfLiteTensor* output,
- double* multiplier) {
- const double input_product_scale =
- static_cast<double>(input->params.scale * filter->params.scale);
- TF_LITE_ENSURE(context, input_product_scale >= 0);
- *multiplier = input_product_scale / static_cast<double>(output->params.scale);
- return kTfLiteOk;
- }
- namespace {
- void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,
- int32_t qmin, int32_t qmax,
- TfLiteTensor* output,
- int32_t* act_min, int32_t* act_max) {
- const auto scale = output->params.scale;
- const auto zero_point = output->params.zero_point;
- auto quantize = [scale, zero_point](float f) {
- return zero_point + static_cast<int32_t>(TfLiteRound(f / scale));
- };
- if (activation == kTfLiteActRelu) {
- *act_min = std::max(qmin, quantize(0.0));
- *act_max = qmax;
- } else if (activation == kTfLiteActRelu6) {
- *act_min = std::max(qmin, quantize(0.0));
- *act_max = std::min(qmax, quantize(6.0));
- } else if (activation == kTfLiteActReluN1To1) {
- *act_min = std::max(qmin, quantize(-1.0));
- *act_max = std::min(qmax, quantize(1.0));
- } else {
- *act_min = qmin;
- *act_max = qmax;
- }
- }
- } // namespace
- TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
- TfLiteFusedActivation activation,
- TfLiteTensor* output,
- int32_t* act_min,
- int32_t* act_max) {
- int32_t qmin = 0;
- int32_t qmax = 0;
- if (output->type == kTfLiteUInt8) {
- qmin = std::numeric_limits<uint8_t>::min();
- qmax = std::numeric_limits<uint8_t>::max();
- } else if (output->type == kTfLiteInt8) {
- qmin = std::numeric_limits<int8_t>::min();
- qmax = std::numeric_limits<int8_t>::max();
- } else if (output->type == kTfLiteInt16) {
- qmin = std::numeric_limits<int16_t>::min();
- qmax = std::numeric_limits<int16_t>::max();
- } else {
- TF_LITE_ENSURE(context, false);
- }
- CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
- act_max);
- return kTfLiteOk;
- }
- bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
- return TfLiteIntArrayEqual(input1->dims, input2->dims);
- }
- // TODO(petewarden): Having macros around this is ugly, look at other strategies
- // before replicating this approach elsewhere.
- #ifndef TF_LITE_STATIC_MEMORY
- TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
- const TfLiteTensor* input1,
- const TfLiteTensor* input2,
- TfLiteIntArray** output_shape) {
- int dims1 = NumDimensions(input1);
- int dims2 = NumDimensions(input2);
- int out_dims = std::max(dims1, dims2);
- if (NumElements(input1) == 0) {
- *output_shape = TfLiteIntArrayCopy(input1->dims);
- return kTfLiteOk;
- }
- std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
- TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
- for (int i = 0; i < out_dims; ++i) {
- int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
- int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
- TF_LITE_ENSURE(context, d1 == d2 || d1 == 1 || d2 == 1);
- shape->data[out_dims - i - 1] = std::max(d1, d2);
- }
- *output_shape = shape.release();
- return kTfLiteOk;
- }
- TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
- const TfLiteTensor* input1,
- const TfLiteTensor* input2,
- const TfLiteTensor* input3,
- TfLiteIntArray** output_shape) {
- int dims1 = NumDimensions(input1);
- int dims2 = NumDimensions(input2);
- int dims3 = NumDimensions(input3);
- int out_dims = std::max(std::max(dims1, dims2), dims3);
- std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
- TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
- for (int i = 0; i < out_dims; ++i) {
- int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
- int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
- int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1);
- int max_value = std::max(std::max(d1, d2), d3);
- TF_LITE_ENSURE(context, d1 == 1 || d1 == max_value);
- TF_LITE_ENSURE(context, d2 == 1 || d2 == max_value);
- TF_LITE_ENSURE(context, d3 == 1 || d3 == max_value);
- shape->data[out_dims - i - 1] = max_value;
- }
- *output_shape = shape.release();
- return kTfLiteOk;
- }
- #endif // TF_LITE_STATIC_MEMORY
- } // namespace tflite
|