5 rokov pred · 89da01cb2b
--- a/tensorflow/lite/micro/tensorflow/SConscript
+++ b/tensorflow/lite/micro/tensorflow/SConscript
@@ -0,0 +1,16 @@
 
				+# RT-Thread building script for bridge
			
 
				+
			
 
				+import os
			
 
				+from building import *
			
 
				+
			
 
				+cwd = GetCurrentDir()
			
 
				+objs = []
			
 
				+list = os.listdir(cwd)
			
 
				+
			
 
				+if GetDepend('PKG_USING_TENSORFLOWLITEMICRO'):
			
 
				+    for d in list:
			
 
				+        path = os.path.join(cwd, d)
			
 
				+        if os.path.isfile(os.path.join(path, 'SConscript')):
			
 
				+            objs = objs + SConscript(os.path.join(d, 'SConscript'))
			
 
				+
			
 
				+Return('objs')
			
--- a/tensorflow/lite/micro/tensorflow/core/public/version.h
+++ b/tensorflow/lite/micro/tensorflow/core/public/version.h
@@ -0,0 +1,139 @@
 
				+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#ifndef TENSORFLOW_CORE_PUBLIC_VERSION_H_
			
 
				+#define TENSORFLOW_CORE_PUBLIC_VERSION_H_
			
 
				+
			
 
				+// TensorFlow uses semantic versioning, see http://semver.org/.
			
 
				+
			
 
				+// Also update tensorflow/tensorflow.bzl and
			
 
				+// tensorflow/tools/pip_package/setup.py
			
 
				+#define TF_MAJOR_VERSION 2
			
 
				+#define TF_MINOR_VERSION 4
			
 
				+#define TF_PATCH_VERSION 0
			
 
				+
			
 
				+// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
			
 
				+// "-beta", "-rc", "-rc.1")
			
 
				+#define TF_VERSION_SUFFIX ""
			
 
				+
			
 
				+#define TF_STR_HELPER(x) #x
			
 
				+#define TF_STR(x) TF_STR_HELPER(x)
			
 
				+
			
 
				+// e.g. "0.5.0" or "0.6.0-alpha".
			
 
				+#define TF_VERSION_STRING                                            \
			
 
				+  (TF_STR(TF_MAJOR_VERSION) "." TF_STR(TF_MINOR_VERSION) "." TF_STR( \
			
 
				+      TF_PATCH_VERSION) TF_VERSION_SUFFIX)
			
 
				+
			
 
				+// GraphDef compatibility versions (the versions field in graph.proto).
			
 
				+//
			
 
				+// Each graph has producer and min_consumer versions, and each
			
 
				+// consumer has its own version and a min_producer.  In addition, graphs can
			
 
				+// mark specific consumer versions as bad (to prevent bugs from executing).
			
 
				+// A consumer will execute a graph if the consumer's version is at least the
			
 
				+// graph's min_consumer, the graph's producer version is at least the consumer's
			
 
				+// min_producer, and the consumer version isn't specifically disallowed by the
			
 
				+// graph.
			
 
				+//
			
 
				+// By default, newly created graphs have producer version TF_GRAPH_DEF_VERSION
			
 
				+// min_consumer TF_GRAPH_DEF_MIN_CONSUMER, and no other bad consumer versions.
			
 
				+//
			
 
				+// Version history:
			
 
				+//
			
 
				+// 0. Graphs created before GraphDef versioning
			
 
				+// 1. First real version (2dec2015)
			
 
				+// 2. adjust_contrast only takes float, doesn't perform clamping (11dec2015)
			
 
				+// 3. Remove TileGrad, since it was equivalent to reduce_sum (30dec2015)
			
 
				+// 4. When support for this version is removed, we can safely make AttrValue
			
 
				+//    parsing more strict with respect to empty list values (see
			
 
				+//    111635679, 7jan2016).
			
 
				+// 5. Graphs are wholly-validated during Session::Create() (7jan2016).
			
 
				+// 6. TensorFlow is scalar strict within Google (27jan2016).
			
 
				+// 7. Remove TopK in favor of TopKV2 (5feb2016).
			
 
				+// 8. Replace RandomCrop from C++ with pure Python (5feb2016).
			
 
				+// 9. Deprecate batch_norm_with_global_normalization (16feb2016).
			
 
				+// 10. Deprecate conv3d_backprop_{filter,input} (10jun2016).
			
 
				+// 11. Deprecate {batch}_self_adjoint_eig (3aug2016).
			
 
				+// 12. Graph consumers understand the node_def field of FunctionDef (22aug2016).
			
 
				+// 13. Deprecate multiple batch linear algebra ops (9sep2016).
			
 
				+// 14. Deprecate batch_matrix_* ops. (10sep2016).
			
 
				+// 15. Deprecate batch_fft_* ops. (14sep2016).
			
 
				+// 16. Deprecate tensor_array (v1) ops in favor of v2 (10nov2016).
			
 
				+// 17. Deprecate inv (11nov2016).
			
 
				+// 17. Expose reverse_v2 (10nov2016)
			
 
				+// 18. Add VariableV2 (30nov2016)
			
 
				+// 19. Deprecated ops created by models moved out of core SkipGram, NegTrain.
			
 
				+//     (08dec2016)
			
 
				+// 20. Catch all version 1.0 changes to Python API generation. SplitV is now
			
 
				+//     used for tf.split, ReverseV2 is now used by tf.reverse, ConcatV2 is
			
 
				+//     now used by tf.concat. Graphs use flooring
			
 
				+//     division and mod semantics. TensorArrayV3. (12dec2016)
			
 
				+//     Also considered the version for when it is required for reduction
			
 
				+//     ops' indices to be scalar or vector, and not higher rank.
			
 
				+//     Some earlier graph def versions allowed this.
			
 
				+// 21. Dropped FunctionDef.Node support, switched to node_def introduced
			
 
				+//     in version 12. (11jan2017)
			
 
				+// 22. Placeholder now can specify and enforce scalar and partial
			
 
				+//     shapes, particularly when restoring a graph from GraphDef
			
 
				+//     produced at version 22 or later.  (04/10/2016)
			
 
				+// 23. Remove NonMaxSuppression in favor of NonMaxSuppressionV2.
			
 
				+// 24. Deprecate lookup ops (v1) ops in favor of v2 (30may2017)
			
 
				+// 25. Deprecate stack (v1) ops in favor of v2 (2017/6/15).
			
 
				+// 25. Deprecate RandomPoisson (v1) ops in favor of v2 (2017/10/25).
			
 
				+// 26. Add a bool 'stripped_default_attrs' to MetaInfoDef indicating
			
 
				+//     whether default-valued attrs have been stripped from the nodes in the
			
 
				+//     GraphDef. (7dec2017)
			
 
				+// 27. Deprecate TensorArray ops v2 in favor of v3 and deprecated io_ops
			
 
				+//     deprecated in favor of V2 ops. (2018/01/23)
			
 
				+// 28. Deprecate MatrixExponential op in favor of Python implementation.
			
 
				+//     (2018/08/21).
			
 
				+// (2019/02/15). Added `control_ret` field to FunctionDef proto, and
			
 
				+//     `control_output` field to OpDef proto.
			
 
				+// 29. Deprecate StatefulStandardNormal op in favor of StatefulStandardNormalV2.
			
 
				+//     (2019/03/25).
			
 
				+// (2019/04/17). Added `arg_attr` field to FunctionDefProto.
			
 
				+// 30. (2019/05/09) First date based GraphDef version. GraphDef
			
 
				+//     versions advance by 1 each day after this point.
			
 
				+
			
 
				+#define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
			
 
				+#define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
			
 
				+#define TF_GRAPH_DEF_VERSION 485  // Updated: 2020/8/6
			
 
				+
			
 
				+// Checkpoint compatibility versions (the versions field in SavedSliceMeta).
			
 
				+//
			
 
				+// The checkpoint versions have the same semantics as GraphDef versions, but the
			
 
				+// numbering scheme is separate.  We have no plans to ever deprecate checkpoint
			
 
				+// versions, but it's good to have this in place in case we ever need to.
			
 
				+//
			
 
				+// Version history:
			
 
				+//
			
 
				+// 0. Checkpoints saved before checkpoint versioning.
			
 
				+// 1. First real version (10feb2015).
			
 
				+#define TF_CHECKPOINT_VERSION_MIN_PRODUCER 0
			
 
				+#define TF_CHECKPOINT_VERSION_MIN_CONSUMER 0
			
 
				+#define TF_CHECKPOINT_VERSION 1
			
 
				+
			
 
				+/// Version query functions (defined in generated version_info.cc)
			
 
				+
			
 
				+// Host compiler version (declared elsewhere to be __VERSION__)
			
 
				+extern const char* tf_compiler_version();
			
 
				+// The git commit designator when tensorflow was built
			
 
				+// If no git repository, this will be "internal".
			
 
				+extern const char* tf_git_version();
			
 
				+// Value of the _GLIBCXX_USE_CXX11_ABI flag, or 0 if it's not set.
			
 
				+extern int tf_cxx11_abi_flag();
			
 
				+// Returns 1 if build is monolithic, or 0 otherwise.
			
 
				+extern int tf_monolithic_build();
			
 
				+
			
 
				+#endif  // TENSORFLOW_CORE_PUBLIC_VERSION_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/SConscript
+++ b/tensorflow/lite/micro/tensorflow/lite/SConscript
@@ -0,0 +1,16 @@
 
				+# RT-Thread building script for bridge
			
 
				+
			
 
				+import os
			
 
				+from building import *
			
 
				+
			
 
				+cwd = GetCurrentDir()
			
 
				+objs = []
			
 
				+list = os.listdir(cwd)
			
 
				+
			
 
				+if GetDepend('PKG_USING_TENSORFLOWLITEMICRO'):
			
 
				+    for d in list:
			
 
				+        path = os.path.join(cwd, d)
			
 
				+        if os.path.isfile(os.path.join(path, 'SConscript')):
			
 
				+            objs = objs + SConscript(os.path.join(d, 'SConscript'))
			
 
				+
			
 
				+Return('objs')
			
--- a/tensorflow/lite/micro/tensorflow/lite/c/SConscript
+++ b/tensorflow/lite/micro/tensorflow/lite/c/SConscript
@@ -0,0 +1,29 @@
 
				+from building import *
			
 
				+import os
			
 
				+
			
 
				+cwd     = GetCurrentDir()
			
 
				+src     = Glob('*.c') + Glob('*.cc')
			
 
				+
			
 
				+#.
			
 
				+root =  str(Dir('#'))
			
 
				+packages = os.path.join(root, 'packages')
			
 
				+file_list = os.listdir(packages)
			
 
				+for f in file_list:
			
 
				+    if(f.split('-')[0] == 'TensorflowLiteMicro'):
			
 
				+        tflm_pkg = os.path.join(packages, f)
			
 
				+        break
			
 
				+#./third_party/flatbuffer/include
			
 
				+flatbuffer = os.path.join(tflm_pkg, "third_party/flatbuffers/include")
			
 
				+#./third_party/gemmlowp
			
 
				+gemmlowp = os.path.join(tflm_pkg, "third_party/gemmlowp")
			
 
				+#./third_party/kissfft
			
 
				+kissfft = os.path.join(tflm_pkg, "third_party/kissfft")
			
 
				+#./third_party/ruy
			
 
				+ruy = os.path.join(tflm_pkg, "third_party/ruy")
			
 
				+
			
 
				+
			
 
				+CPPPATH = [tflm_pkg, flatbuffer, gemmlowp, kissfft, ruy]
			
 
				+
			
 
				+group = DefineGroup('lite', src, depend = ['PKG_USING_TENSORFLOWLITEMICRO'], CPPPATH = CPPPATH)
			
 
				+
			
 
				+Return('group')
			
--- a/tensorflow/lite/micro/tensorflow/lite/c/builtin_op_data.h
+++ b/tensorflow/lite/micro/tensorflow/lite/c/builtin_op_data.h
@@ -0,0 +1,472 @@
 
				+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
			
 
				+#define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif  // __cplusplus
			
 
				+
			
 
				+// TfLiteReshapeParams can't have dynamic data so we fix the maximum possible
			
 
				+// number of dimensions.
			
 
				+#define TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT 8
			
 
				+
			
 
				+// TODO(aselle): Consider using "if this then that" for testing.
			
 
				+
			
 
				+// Useful placeholder to put in otherwise empty structs to avoid size warnings.
			
 
				+typedef struct {
			
 
				+  char dummy;
			
 
				+} EmptyStructPlaceholder;
			
 
				+
			
 
				+// IMPORTANT: All new members of structs must be added at the end to ensure
			
 
				+// backwards compatibility.
			
 
				+
			
 
				+// Possible padding types (for convolutions)
			
 
				+typedef enum {
			
 
				+  kTfLitePaddingUnknown = 0,
			
 
				+  kTfLitePaddingSame,
			
 
				+  kTfLitePaddingValid,
			
 
				+} TfLitePadding;
			
 
				+
			
 
				+typedef enum {
			
 
				+  kTfLiteMirrorPaddingUnknown = 0,
			
 
				+  kTfLiteMirrorPaddingReflect,
			
 
				+  kTfLiteMirrorPaddingSymmetric,
			
 
				+} TfLiteMirrorPaddingMode;
			
 
				+
			
 
				+// TODO(b/130259536): We should move this out of builtin_op_data.
			
 
				+typedef struct {
			
 
				+  int width;
			
 
				+  int height;
			
 
				+  int width_offset;
			
 
				+  int height_offset;
			
 
				+} TfLitePaddingValues;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLiteMirrorPaddingMode mode;
			
 
				+} TfLiteMirrorPaddingParams;
			
 
				+
			
 
				+// Possible fused activation functions.
			
 
				+// TODO(aselle): rename to TfLiteActivation
			
 
				+typedef enum {
			
 
				+  kTfLiteActNone = 0,
			
 
				+  kTfLiteActRelu,
			
 
				+  kTfLiteActReluN1To1,                    // min(max(-1, x), 1)
			
 
				+  kTfLiteActRelu1 = kTfLiteActReluN1To1,  // kTfLiteActRelu1 will be deprecated.
			
 
				+  kTfLiteActRelu6,                        // min(max(0, x), 6)
			
 
				+  kTfLiteActTanh,
			
 
				+  kTfLiteActSignBit,
			
 
				+  kTfLiteActSigmoid,
			
 
				+} TfLiteFusedActivation;
			
 
				+
			
 
				+typedef struct {
			
 
				+  // Parameters for CONV_2D version 1.
			
 
				+  TfLitePadding padding;
			
 
				+  int stride_width;
			
 
				+  int stride_height;
			
 
				+  TfLiteFusedActivation activation;
			
 
				+
			
 
				+  // Parameters for CONV_2D version 2.
			
 
				+  // Note: Version 2 supports dilation values not equal to 1.
			
 
				+  int dilation_width_factor;
			
 
				+  int dilation_height_factor;
			
 
				+} TfLiteConvParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLitePadding padding;
			
 
				+  int stride_width;
			
 
				+  int stride_height;
			
 
				+  int filter_width;
			
 
				+  int filter_height;
			
 
				+  TfLiteFusedActivation activation;
			
 
				+  struct {
			
 
				+    TfLitePaddingValues padding;
			
 
				+  } computed;
			
 
				+} TfLitePoolParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  // Parameters for DepthwiseConv version 1 or above.
			
 
				+  TfLitePadding padding;
			
 
				+  int stride_width;
			
 
				+  int stride_height;
			
 
				+  // `depth_multiplier` is redundant. It's used by CPU kernels in
			
 
				+  // TensorFlow 2.0 or below, but ignored in versions above.
			
 
				+  //
			
 
				+  // The information can be deduced from the shape of input and the shape of
			
 
				+  // weights. Since the TFLiteConverter toolchain doesn't support partially
			
 
				+  // specified shapes, relying on `depth_multiplier` stops us from supporting
			
 
				+  // graphs with dynamic shape tensors.
			
 
				+  //
			
 
				+  // Note: Some of the delegates (e.g. NNAPI, GPU) are still relying on this
			
 
				+  // field.
			
 
				+  int depth_multiplier;
			
 
				+  TfLiteFusedActivation activation;
			
 
				+  // Parameters for DepthwiseConv version 2 or above.
			
 
				+  int dilation_width_factor;
			
 
				+  int dilation_height_factor;
			
 
				+} TfLiteDepthwiseConvParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int rank;
			
 
				+  TfLiteFusedActivation activation;
			
 
				+
			
 
				+  // Parameter for SVDF version 4.
			
 
				+  bool asymmetric_quantize_inputs;
			
 
				+} TfLiteSVDFParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLiteFusedActivation activation;
			
 
				+
			
 
				+  // Parameter for RNN version 3.
			
 
				+  bool asymmetric_quantize_inputs;
			
 
				+} TfLiteRNNParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  bool time_major;
			
 
				+  TfLiteFusedActivation activation;
			
 
				+
			
 
				+  // Parameter for Sequence RNN version 3.
			
 
				+  bool asymmetric_quantize_inputs;
			
 
				+} TfLiteSequenceRNNParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  bool time_major;
			
 
				+  TfLiteFusedActivation activation;
			
 
				+  bool merge_outputs;
			
 
				+
			
 
				+  // Parameter for Bidirectional RNN verison 3.
			
 
				+  bool asymmetric_quantize_inputs;
			
 
				+} TfLiteBidirectionalSequenceRNNParams;
			
 
				+
			
 
				+typedef enum {
			
 
				+  kTfLiteFullyConnectedWeightsFormatDefault = 0,
			
 
				+  kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1,
			
 
				+} TfLiteFullyConnectedWeightsFormat;
			
 
				+
			
 
				+typedef struct {
			
 
				+  // Parameters for FullyConnected version 1 or above.
			
 
				+  TfLiteFusedActivation activation;
			
 
				+
			
 
				+  // Parameters for FullyConnected version 2 or above.
			
 
				+  TfLiteFullyConnectedWeightsFormat weights_format;
			
 
				+
			
 
				+  // Parameters for FullyConnected version 5 or above.
			
 
				+  // If set to true, then the number of dimensions in the input and the output
			
 
				+  // tensors are the same. Furthermore, all but the last dimension of the input
			
 
				+  // and output shapes will be equal.
			
 
				+  bool keep_num_dims;
			
 
				+
			
 
				+  // Parameters for FullyConnected version 7 or above.
			
 
				+  // If set to true and the weights are quantized, then non constant inputs
			
 
				+  // are quantized at evaluation time with asymmetric quantization.
			
 
				+  bool asymmetric_quantize_inputs;
			
 
				+} TfLiteFullyConnectedParams;
			
 
				+
			
 
				+typedef enum {
			
 
				+  kTfLiteLshProjectionUnknown = 0,
			
 
				+  kTfLiteLshProjectionSparse = 1,
			
 
				+  kTfLiteLshProjectionDense = 2,
			
 
				+} TfLiteLSHProjectionType;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLiteLSHProjectionType type;
			
 
				+} TfLiteLSHProjectionParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  float beta;
			
 
				+} TfLiteSoftmaxParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int axis;
			
 
				+  TfLiteFusedActivation activation;
			
 
				+} TfLiteConcatenationParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLiteFusedActivation activation;
			
 
				+  // Parameter added for the version 4.
			
 
				+  bool pot_scale_int16;
			
 
				+} TfLiteAddParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  EmptyStructPlaceholder placeholder;
			
 
				+} TfLiteSpaceToBatchNDParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  EmptyStructPlaceholder placeholder;
			
 
				+} TfLiteBatchToSpaceNDParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  bool adj_x;
			
 
				+  bool adj_y;
			
 
				+} TfLiteBatchMatMulParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLiteFusedActivation activation;
			
 
				+} TfLiteMulParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLiteFusedActivation activation;
			
 
				+  // Parameter added for the version 5.
			
 
				+  bool pot_scale_int16;
			
 
				+} TfLiteSubParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLiteFusedActivation activation;
			
 
				+} TfLiteDivParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLiteFusedActivation activation;
			
 
				+} TfLiteL2NormParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int radius;
			
 
				+  float bias;
			
 
				+  float alpha;
			
 
				+  float beta;
			
 
				+} TfLiteLocalResponseNormParams;
			
 
				+
			
 
				+typedef enum {
			
 
				+  kTfLiteLSTMFullKernel = 0,
			
 
				+  kTfLiteLSTMBasicKernel
			
 
				+} TfLiteLSTMKernelType;
			
 
				+
			
 
				+typedef struct {
			
 
				+  // Parameters for LSTM version 1.
			
 
				+  TfLiteFusedActivation activation;
			
 
				+  float cell_clip;
			
 
				+  float proj_clip;
			
 
				+
			
 
				+  // Parameters for LSTM version 2.
			
 
				+  // kTfLiteLSTMBasicKernel is only supported in version 2 or above.
			
 
				+  TfLiteLSTMKernelType kernel_type;
			
 
				+
			
 
				+  // Parameters for LSTM version 4.
			
 
				+  bool asymmetric_quantize_inputs;
			
 
				+} TfLiteLSTMParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  // Parameters needed for the underlying LSTM.
			
 
				+  TfLiteFusedActivation activation;
			
 
				+  float cell_clip;
			
 
				+  float proj_clip;
			
 
				+
			
 
				+  // If set to true then the first dimension is time, otherwise batch.
			
 
				+  bool time_major;
			
 
				+
			
 
				+  // Parameter for unidirectional sequence RNN version 3.
			
 
				+  bool asymmetric_quantize_inputs;
			
 
				+} TfLiteUnidirectionalSequenceLSTMParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  // Parameters supported by version 1:
			
 
				+  // Parameters inherited for the LSTM kernel.
			
 
				+  TfLiteFusedActivation activation;
			
 
				+  float cell_clip;
			
 
				+  float proj_clip;
			
 
				+
			
 
				+  // If true, store the outputs of both directions in the first output.
			
 
				+  bool merge_outputs;
			
 
				+
			
 
				+  // Parameters supported by version 2:
			
 
				+  // If set to true then the first dimension is time, otherwise batch.
			
 
				+  bool time_major;
			
 
				+
			
 
				+  // Parameters supported by version 4:
			
 
				+  // If set to true, then hybrid ops use asymmetric quantization for inputs.
			
 
				+  bool asymmetric_quantize_inputs;
			
 
				+} TfLiteBidirectionalSequenceLSTMParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  bool align_corners;
			
 
				+  // half_pixel_centers assumes pixels are of half the actual dimensions, and
			
 
				+  // yields more accurate resizes. Corresponds to the same argument for the
			
 
				+  // original TensorFlow op in TF2.0.
			
 
				+  bool half_pixel_centers;
			
 
				+} TfLiteResizeBilinearParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  bool align_corners;
			
 
				+  bool half_pixel_centers;
			
 
				+} TfLiteResizeNearestNeighborParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  EmptyStructPlaceholder placeholder;
			
 
				+} TfLitePadParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  EmptyStructPlaceholder placeholder;
			
 
				+} TfLitePadV2Params;
			
 
				+
			
 
				+typedef struct {
			
 
				+  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
			
 
				+  // For now we will fix the maximum possible number of dimensions.
			
 
				+  int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
			
 
				+  int num_dimensions;
			
 
				+} TfLiteReshapeParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int ngram_size;
			
 
				+  int max_skip_size;
			
 
				+  bool include_all_ngrams;
			
 
				+} TfLiteSkipGramParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int block_size;
			
 
				+} TfLiteSpaceToDepthParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int block_size;
			
 
				+} TfLiteDepthToSpaceParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLiteType in_data_type;
			
 
				+  TfLiteType out_data_type;
			
 
				+} TfLiteCastParams;
			
 
				+
			
 
				+typedef enum {
			
 
				+  kTfLiteCombinerTypeSum = 0,
			
 
				+  kTfLiteCombinerTypeMean = 1,
			
 
				+  kTfLiteCombinerTypeSqrtn = 2,
			
 
				+} TfLiteCombinerType;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLiteCombinerType combiner;
			
 
				+} TfLiteEmbeddingLookupSparseParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int axis;
			
 
				+} TfLiteGatherParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  EmptyStructPlaceholder placeholder;
			
 
				+} TfLiteTransposeParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  bool keep_dims;
			
 
				+} TfLiteReducerParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int num_splits;
			
 
				+} TfLiteSplitParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int num_splits;
			
 
				+} TfLiteSplitVParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
			
 
				+  // For now we will fix the maximum possible number of dimensions.
			
 
				+  int squeeze_dims[8];
			
 
				+  int num_squeeze_dims;
			
 
				+} TfLiteSqueezeParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int begin_mask;
			
 
				+  int end_mask;
			
 
				+  int ellipsis_mask;
			
 
				+  int new_axis_mask;
			
 
				+  int shrink_axis_mask;
			
 
				+} TfLiteStridedSliceParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLiteType output_type;
			
 
				+} TfLiteArgMaxParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLiteType output_type;
			
 
				+} TfLiteArgMinParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLitePadding padding;
			
 
				+  int stride_width;
			
 
				+  int stride_height;
			
 
				+} TfLiteTransposeConvParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  bool validate_indices;
			
 
				+} TfLiteSparseToDenseParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLiteType out_type;
			
 
				+} TfLiteShapeParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  EmptyStructPlaceholder placeholder;
			
 
				+} TfLiteRankParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  // Parameters supported by version 1:
			
 
				+  float min;
			
 
				+  float max;
			
 
				+  int num_bits;
			
 
				+
			
 
				+  // Parameters supported by version 2:
			
 
				+  bool narrow_range;
			
 
				+} TfLiteFakeQuantParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int values_count;
			
 
				+  int axis;
			
 
				+} TfLitePackParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int axis;
			
 
				+} TfLiteOneHotParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int num;
			
 
				+  int axis;
			
 
				+} TfLiteUnpackParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  float alpha;
			
 
				+} TfLiteLeakyReluParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  TfLiteType index_out_type;
			
 
				+} TfLiteUniqueParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int seq_dim;
			
 
				+  int batch_dim;
			
 
				+} TfLiteReverseSequenceParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  EmptyStructPlaceholder placeholder;
			
 
				+} TfLiteMatrixDiagParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  EmptyStructPlaceholder placeholder;
			
 
				+} TfLiteMatrixSetDiagParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int then_subgraph_index;
			
 
				+  int else_subgraph_index;
			
 
				+} TfLiteIfParams;
			
 
				+
			
 
				+typedef struct {
			
 
				+  int cond_subgraph_index;
			
 
				+  int body_subgraph_index;
			
 
				+} TfLiteWhileParams;
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif  // __cplusplus
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/c/common.c
+++ b/tensorflow/lite/micro/tensorflow/lite/c/common.c
@@ -0,0 +1,232 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#ifndef TF_LITE_STATIC_MEMORY
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+#endif  // TF_LITE_STATIC_MEMORY
			
 
				+
			
 
				+int TfLiteIntArrayGetSizeInBytes(int size) {
			
 
				+  static TfLiteIntArray dummy;
			
 
				+  return sizeof(dummy) + sizeof(dummy.data[0]) * size;
			
 
				+}
			
 
				+
			
 
				+int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b) {
			
 
				+  if (a == b) return 1;
			
 
				+  if (a == NULL || b == NULL) return 0;
			
 
				+  return TfLiteIntArrayEqualsArray(a, b->size, b->data);
			
 
				+}
			
 
				+
			
 
				+int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
			
 
				+                              const int b_data[]) {
			
 
				+  if (a == NULL) return (b_size == 0);
			
 
				+  if (a->size != b_size) return 0;
			
 
				+  int i = 0;
			
 
				+  for (; i < a->size; i++)
			
 
				+    if (a->data[i] != b_data[i]) return 0;
			
 
				+  return 1;
			
 
				+}
			
 
				+
			
 
				+#ifndef TF_LITE_STATIC_MEMORY
			
 
				+
			
 
				+TfLiteIntArray* TfLiteIntArrayCreate(int size) {
			
 
				+  TfLiteIntArray* ret =
			
 
				+      (TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size));
			
 
				+  ret->size = size;
			
 
				+  return ret;
			
 
				+}
			
 
				+
			
 
				+TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src) {
			
 
				+  if (!src) return NULL;
			
 
				+  TfLiteIntArray* ret = TfLiteIntArrayCreate(src->size);
			
 
				+  if (ret) {
			
 
				+    memcpy(ret->data, src->data, src->size * sizeof(int));
			
 
				+  }
			
 
				+  return ret;
			
 
				+}
			
 
				+
			
 
				+void TfLiteIntArrayFree(TfLiteIntArray* a) { free(a); }
			
 
				+
			
 
				+#endif  // TF_LITE_STATIC_MEMORY
			
 
				+
			
 
				+int TfLiteFloatArrayGetSizeInBytes(int size) {
			
 
				+  static TfLiteFloatArray dummy;
			
 
				+  return sizeof(dummy) + sizeof(dummy.data[0]) * size;
			
 
				+}
			
 
				+
			
 
				+#ifndef TF_LITE_STATIC_MEMORY
			
 
				+
			
 
				+TfLiteFloatArray* TfLiteFloatArrayCreate(int size) {
			
 
				+  TfLiteFloatArray* ret =
			
 
				+      (TfLiteFloatArray*)malloc(TfLiteFloatArrayGetSizeInBytes(size));
			
 
				+  ret->size = size;
			
 
				+  return ret;
			
 
				+}
			
 
				+
			
 
				+void TfLiteFloatArrayFree(TfLiteFloatArray* a) { free(a); }
			
 
				+
			
 
				+void TfLiteTensorDataFree(TfLiteTensor* t) {
			
 
				+  if (t->allocation_type == kTfLiteDynamic ||
			
 
				+      t->allocation_type == kTfLitePersistentRo) {
			
 
				+    free(t->data.raw);
			
 
				+  }
			
 
				+  t->data.raw = NULL;
			
 
				+}
			
 
				+
			
 
				+void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
			
 
				+  if (quantization->type == kTfLiteAffineQuantization) {
			
 
				+    TfLiteAffineQuantization* q_params =
			
 
				+        (TfLiteAffineQuantization*)(quantization->params);
			
 
				+    if (q_params->scale) {
			
 
				+      TfLiteFloatArrayFree(q_params->scale);
			
 
				+      q_params->scale = NULL;
			
 
				+    }
			
 
				+    if (q_params->zero_point) {
			
 
				+      TfLiteIntArrayFree(q_params->zero_point);
			
 
				+      q_params->zero_point = NULL;
			
 
				+    }
			
 
				+    free(q_params);
			
 
				+  }
			
 
				+  quantization->params = NULL;
			
 
				+  quantization->type = kTfLiteNoQuantization;
			
 
				+}
			
 
				+
			
 
				+void TfLiteSparsityFree(TfLiteSparsity* sparsity) {
			
 
				+  if (sparsity == NULL) {
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  if (sparsity->traversal_order) {
			
 
				+    TfLiteIntArrayFree(sparsity->traversal_order);
			
 
				+    sparsity->traversal_order = NULL;
			
 
				+  }
			
 
				+
			
 
				+  if (sparsity->block_map) {
			
 
				+    TfLiteIntArrayFree(sparsity->block_map);
			
 
				+    sparsity->block_map = NULL;
			
 
				+  }
			
 
				+
			
 
				+  if (sparsity->dim_metadata) {
			
 
				+    int i = 0;
			
 
				+    for (; i < sparsity->dim_metadata_size; i++) {
			
 
				+      TfLiteDimensionMetadata metadata = sparsity->dim_metadata[i];
			
 
				+      if (metadata.format == kTfLiteDimSparseCSR) {
			
 
				+        TfLiteIntArrayFree(metadata.array_segments);
			
 
				+        metadata.array_segments = NULL;
			
 
				+        TfLiteIntArrayFree(metadata.array_indices);
			
 
				+        metadata.array_indices = NULL;
			
 
				+      }
			
 
				+    }
			
 
				+    free(sparsity->dim_metadata);
			
 
				+    sparsity->dim_metadata = NULL;
			
 
				+  }
			
 
				+
			
 
				+  free(sparsity);
			
 
				+}
			
 
				+
			
 
				+void TfLiteTensorFree(TfLiteTensor* t) {
			
 
				+  TfLiteTensorDataFree(t);
			
 
				+  if (t->dims) TfLiteIntArrayFree(t->dims);
			
 
				+  t->dims = NULL;
			
 
				+
			
 
				+  if (t->dims_signature) {
			
 
				+    TfLiteIntArrayFree((TfLiteIntArray *) t->dims_signature);
			
 
				+  }
			
 
				+  t->dims_signature = NULL;
			
 
				+
			
 
				+  TfLiteQuantizationFree(&t->quantization);
			
 
				+  TfLiteSparsityFree(t->sparsity);
			
 
				+  t->sparsity = NULL;
			
 
				+}
			
 
				+
			
 
				+void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
			
 
				+                       TfLiteQuantizationParams quantization, char* buffer,
			
 
				+                       size_t size, TfLiteAllocationType allocation_type,
			
 
				+                       const void* allocation, bool is_variable,
			
 
				+                       TfLiteTensor* tensor) {
			
 
				+  TfLiteTensorFree(tensor);
			
 
				+  tensor->type = type;
			
 
				+  tensor->name = name;
			
 
				+  tensor->dims = dims;
			
 
				+  tensor->params = quantization;
			
 
				+  tensor->data.raw = buffer;
			
 
				+  tensor->bytes = size;
			
 
				+  tensor->allocation_type = allocation_type;
			
 
				+  tensor->allocation = allocation;
			
 
				+  tensor->is_variable = is_variable;
			
 
				+
			
 
				+  tensor->quantization.type = kTfLiteNoQuantization;
			
 
				+  tensor->quantization.params = NULL;
			
 
				+}
			
 
				+
			
 
				+void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
			
 
				+  if (tensor->allocation_type != kTfLiteDynamic &&
			
 
				+      tensor->allocation_type != kTfLitePersistentRo) {
			
 
				+    return;
			
 
				+  }
			
 
				+  // TODO(b/145340303): Tensor data should be aligned.
			
 
				+  if (!tensor->data.raw) {
			
 
				+    tensor->data.raw = malloc(num_bytes);
			
 
				+  } else if (num_bytes > tensor->bytes) {
			
 
				+    tensor->data.raw = realloc(tensor->data.raw, num_bytes);
			
 
				+  }
			
 
				+  tensor->bytes = num_bytes;
			
 
				+}
			
 
				+#endif  // TF_LITE_STATIC_MEMORY
			
 
				+
			
 
				+const char* TfLiteTypeGetName(TfLiteType type) {
			
 
				+  switch (type) {
			
 
				+    case kTfLiteNoType:
			
 
				+      return "NOTYPE";
			
 
				+    case kTfLiteFloat32:
			
 
				+      return "FLOAT32";
			
 
				+    case kTfLiteInt16:
			
 
				+      return "INT16";
			
 
				+    case kTfLiteInt32:
			
 
				+      return "INT32";
			
 
				+    case kTfLiteUInt8:
			
 
				+      return "UINT8";
			
 
				+    case kTfLiteInt8:
			
 
				+      return "INT8";
			
 
				+    case kTfLiteInt64:
			
 
				+      return "INT64";
			
 
				+    case kTfLiteBool:
			
 
				+      return "BOOL";
			
 
				+    case kTfLiteComplex64:
			
 
				+      return "COMPLEX64";
			
 
				+    case kTfLiteComplex128:
			
 
				+      return "COMPLEX128";
			
 
				+    case kTfLiteString:
			
 
				+      return "STRING";
			
 
				+    case kTfLiteFloat16:
			
 
				+      return "FLOAT16";
			
 
				+    case kTfLiteFloat64:
			
 
				+      return "FLOAT64";
			
 
				+  }
			
 
				+  return "Unknown type";
			
 
				+}
			
 
				+
			
 
				+TfLiteDelegate TfLiteDelegateCreate() {
			
 
				+  TfLiteDelegate d = {
			
 
				+      .data_ = NULL,
			
 
				+      .Prepare = NULL,
			
 
				+      .CopyFromBufferHandle = NULL,
			
 
				+      .CopyToBufferHandle = NULL,
			
 
				+      .FreeBufferHandle = NULL,
			
 
				+      .flags = kTfLiteDelegateFlagsNone,
			
 
				+  };
			
 
				+  return d;
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/c/common.h
+++ b/tensorflow/lite/micro/tensorflow/lite/c/common.h
@@ -0,0 +1,936 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+// This file defines common C types and APIs for implementing operations,
			
 
				+// delegates and other constructs in TensorFlow Lite. The actual operations and
			
 
				+// delegates can be defined using C++, but the interface between the interpreter
			
 
				+// and the operations are C.
			
 
				+//
			
 
				+// Summary of abstractions
			
 
				+// TF_LITE_ENSURE - Self-sufficient error checking
			
 
				+// TfLiteStatus - Status reporting
			
 
				+// TfLiteIntArray - stores tensor shapes (dims),
			
 
				+// TfLiteContext - allows an op to access the tensors
			
 
				+// TfLiteTensor - tensor (a multidimensional array)
			
 
				+// TfLiteNode - a single node or operation
			
 
				+// TfLiteRegistration - the implementation of a conceptual operation.
			
 
				+// TfLiteDelegate - allows delegation of nodes to alternative backends.
			
 
				+//
			
 
				+// Some abstractions in this file are created and managed by Interpreter.
			
 
				+//
			
 
				+// NOTE: The order of values in these structs are "semi-ABI stable". New values
			
 
				+// should be added only to the end of structs and never reordered.
			
 
				+
			
 
				+#ifndef TENSORFLOW_LITE_C_COMMON_H_
			
 
				+#define TENSORFLOW_LITE_C_COMMON_H_
			
 
				+
			
 
				+#include <stdbool.h>
			
 
				+#include <stddef.h>
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif  // __cplusplus
			
 
				+
			
 
				+typedef enum TfLiteStatus {
			
 
				+  kTfLiteOk = 0,
			
 
				+  kTfLiteError = 1,
			
 
				+  kTfLiteDelegateError = 2
			
 
				+} TfLiteStatus;
			
 
				+
			
 
				+// The list of external context types known to TF Lite. This list exists solely
			
 
				+// to avoid conflicts and to ensure ops can share the external contexts they
			
 
				+// need. Access to the external contexts is controlled by one of the
			
 
				+// corresponding support files.
			
 
				+typedef enum TfLiteExternalContextType {
			
 
				+  kTfLiteEigenContext = 0,       // include eigen_support.h to use.
			
 
				+  kTfLiteGemmLowpContext = 1,    // include gemm_support.h to use.
			
 
				+  kTfLiteEdgeTpuContext = 2,     // Placeholder for Edge TPU support.
			
 
				+  kTfLiteCpuBackendContext = 3,  // include cpu_backend_context.h to use.
			
 
				+  kTfLiteMaxExternalContexts = 4
			
 
				+} TfLiteExternalContextType;
			
 
				+
			
 
				+// Forward declare so dependent structs and methods can reference these types
			
 
				+// prior to the struct definitions.
			
 
				+struct TfLiteContext;
			
 
				+struct TfLiteDelegate;
			
 
				+struct TfLiteRegistration;
			
 
				+
			
 
				+// An external context is a collection of information unrelated to the TF Lite
			
 
				+// framework, but useful to a subset of the ops. TF Lite knows very little
			
 
				+// about about the actual contexts, but it keeps a list of them, and is able to
			
 
				+// refresh them if configurations like the number of recommended threads
			
 
				+// change.
			
 
				+typedef struct TfLiteExternalContext {
			
 
				+  TfLiteExternalContextType type;
			
 
				+  TfLiteStatus (*Refresh)(struct TfLiteContext* context);
			
 
				+} TfLiteExternalContext;
			
 
				+
			
 
				+#define kTfLiteOptionalTensor (-1)
			
 
				+
			
 
				+// Fixed size list of integers. Used for dimensions and inputs/outputs tensor
			
 
				+// indices
			
 
				+typedef struct TfLiteIntArray {
			
 
				+  int size;
			
 
				+// gcc 6.1+ have a bug where flexible members aren't properly handled
			
 
				+// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
			
 
				+#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
			
 
				+     __GNUC_MINOR__ >= 1) ||                                      \
			
 
				+    defined(HEXAGON)
			
 
				+  int data[0];
			
 
				+#else
			
 
				+  int data[];
			
 
				+#endif
			
 
				+} TfLiteIntArray;
			
 
				+
			
 
				+// Given the size (number of elements) in a TfLiteIntArray, calculate its size
			
 
				+// in bytes.
			
 
				+int TfLiteIntArrayGetSizeInBytes(int size);
			
 
				+
			
 
				+#ifndef TF_LITE_STATIC_MEMORY
			
 
				+// Create a array of a given `size` (uninitialized entries).
			
 
				+// This returns a pointer, that you must free using TfLiteIntArrayFree().
			
 
				+TfLiteIntArray* TfLiteIntArrayCreate(int size);
			
 
				+#endif
			
 
				+
			
 
				+// Check if two intarrays are equal. Returns 1 if they are equal, 0 otherwise.
			
 
				+int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b);
			
 
				+
			
 
				+// Check if an intarray equals an array. Returns 1 if equals, 0 otherwise.
			
 
				+int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
			
 
				+                              const int b_data[]);
			
 
				+
			
 
				+#ifndef TF_LITE_STATIC_MEMORY
			
 
				+// Create a copy of an array passed as `src`.
			
 
				+// You are expected to free memory with TfLiteIntArrayFree
			
 
				+TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src);
			
 
				+
			
 
				+// Free memory of array `a`.
			
 
				+void TfLiteIntArrayFree(TfLiteIntArray* a);
			
 
				+#endif  // TF_LITE_STATIC_MEMORY
			
 
				+
			
 
				+// Fixed size list of floats. Used for per-channel quantization.
			
 
				+typedef struct TfLiteFloatArray {
			
 
				+  int size;
			
 
				+// gcc 6.1+ have a bug where flexible members aren't properly handled
			
 
				+// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
			
 
				+// This also applies to the toolchain used for Qualcomm Hexagon DSPs.
			
 
				+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
			
 
				+    __GNUC_MINOR__ >= 1
			
 
				+  float data[0];
			
 
				+#else
			
 
				+  float data[];
			
 
				+#endif
			
 
				+} TfLiteFloatArray;
			
 
				+
			
 
				+// Given the size (number of elements) in a TfLiteFloatArray, calculate its size
			
 
				+// in bytes.
			
 
				+int TfLiteFloatArrayGetSizeInBytes(int size);
			
 
				+
			
 
				+#ifndef TF_LITE_STATIC_MEMORY
			
 
				+// Create a array of a given `size` (uninitialized entries).
			
 
				+// This returns a pointer, that you must free using TfLiteFloatArrayFree().
			
 
				+TfLiteFloatArray* TfLiteFloatArrayCreate(int size);
			
 
				+
			
 
				+// Free memory of array `a`.
			
 
				+void TfLiteFloatArrayFree(TfLiteFloatArray* a);
			
 
				+#endif  // TF_LITE_STATIC_MEMORY
			
 
				+
			
 
				+// Since we must not depend on any libraries, define a minimal subset of
			
 
				+// error macros while avoiding names that have pre-conceived meanings like
			
 
				+// assert and check.
			
 
				+
			
 
				+// Try to make all reporting calls through TF_LITE_KERNEL_LOG rather than
			
 
				+// calling the context->ReportError function directly, so that message strings
			
 
				+// can be stripped out if the binary size needs to be severely optimized.
			
 
				+#ifndef TF_LITE_STRIP_ERROR_STRINGS
			
 
				+#define TF_LITE_KERNEL_LOG(context, ...)            \
			
 
				+  do {                                              \
			
 
				+    (context)->ReportError((context), __VA_ARGS__); \
			
 
				+  } while (false)
			
 
				+
			
 
				+#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)        \
			
 
				+  do {                                                \
			
 
				+    if ((context) != nullptr) {                       \
			
 
				+      (context)->ReportError((context), __VA_ARGS__); \
			
 
				+    }                                                 \
			
 
				+  } while (false)
			
 
				+#else  // TF_LITE_STRIP_ERROR_STRINGS
			
 
				+#define TF_LITE_KERNEL_LOG(context, ...)
			
 
				+#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)
			
 
				+#endif  // TF_LITE_STRIP_ERROR_STRINGS
			
 
				+
			
 
				+// Check whether value is true, and if not return kTfLiteError from
			
 
				+// the current function (and report the error string msg).
			
 
				+#define TF_LITE_ENSURE_MSG(context, value, msg)        \
			
 
				+  do {                                                 \
			
 
				+    if (!(value)) {                                    \
			
 
				+      TF_LITE_KERNEL_LOG((context), __FILE__ " " msg); \
			
 
				+      return kTfLiteError;                             \
			
 
				+    }                                                  \
			
 
				+  } while (0)
			
 
				+
			
 
				+// Check whether the value `a` is true, and if not return kTfLiteError from
			
 
				+// the current function, while also reporting the location of the error.
			
 
				+#define TF_LITE_ENSURE(context, a)                                      \
			
 
				+  do {                                                                  \
			
 
				+    if (!(a)) {                                                         \
			
 
				+      TF_LITE_KERNEL_LOG((context), "%s:%d %s was not true.", __FILE__, \
			
 
				+                         __LINE__, #a);                                 \
			
 
				+      return kTfLiteError;                                              \
			
 
				+    }                                                                   \
			
 
				+  } while (0)
			
 
				+
			
 
				+#define TF_LITE_ENSURE_STATUS(a) \
			
 
				+  do {                           \
			
 
				+    const TfLiteStatus s = (a);  \
			
 
				+    if (s != kTfLiteOk) {        \
			
 
				+      return s;                  \
			
 
				+    }                            \
			
 
				+  } while (0)
			
 
				+
			
 
				+// Check whether the value `a == b` is true, and if not return kTfLiteError from
			
 
				+// the current function, while also reporting the location of the error.
			
 
				+// `a` and `b` may be evaluated more than once, so no side effects or
			
 
				+// extremely expensive computations should be done.
			
 
				+// NOTE: Use TF_LITE_ENSURE_TYPES_EQ if comparing TfLiteTypes.
			
 
				+#define TF_LITE_ENSURE_EQ(context, a, b)                                   \
			
 
				+  do {                                                                     \
			
 
				+    if ((a) != (b)) {                                                      \
			
 
				+      TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%d != %d)", __FILE__, \
			
 
				+                         __LINE__, #a, #b, (a), (b));                      \
			
 
				+      return kTfLiteError;                                                 \
			
 
				+    }                                                                      \
			
 
				+  } while (0)
			
 
				+
			
 
				+#define TF_LITE_ENSURE_TYPES_EQ(context, a, b)                             \
			
 
				+  do {                                                                     \
			
 
				+    if ((a) != (b)) {                                                      \
			
 
				+      TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%s != %s)", __FILE__, \
			
 
				+                         __LINE__, #a, #b, TfLiteTypeGetName(a),           \
			
 
				+                         TfLiteTypeGetName(b));                            \
			
 
				+      return kTfLiteError;                                                 \
			
 
				+    }                                                                      \
			
 
				+  } while (0)
			
 
				+
			
 
				+#define TF_LITE_ENSURE_OK(context, status) \
			
 
				+  do {                                     \
			
 
				+    const TfLiteStatus s = (status);       \
			
 
				+    if ((s) != kTfLiteOk) {                \
			
 
				+      return s;                            \
			
 
				+    }                                      \
			
 
				+  } while (0)
			
 
				+
			
 
				+// Define TFL_CAPI_EXPORT macro to export a function properly with a shared
			
 
				+// library.
			
 
				+#ifdef SWIG
			
 
				+#define TFL_CAPI_EXPORT
			
 
				+#else
			
 
				+#if defined(_WIN32)
			
 
				+#ifdef TFL_COMPILE_LIBRARY
			
 
				+#define TFL_CAPI_EXPORT __declspec(dllexport)
			
 
				+#else
			
 
				+#define TFL_CAPI_EXPORT __declspec(dllimport)
			
 
				+#endif  // TFL_COMPILE_LIBRARY
			
 
				+#else
			
 
				+#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
			
 
				+#endif  // _WIN32
			
 
				+#endif  // SWIG
			
 
				+
			
 
				+// Single-precision complex data type compatible with the C99 definition.
			
 
				+typedef struct TfLiteComplex64 {
			
 
				+  float re, im;  // real and imaginary parts, respectively.
			
 
				+} TfLiteComplex64;
			
 
				+
			
 
				+// Double-precision complex data type compatible with the C99 definition.
			
 
				+typedef struct TfLiteComplex128 {
			
 
				+  double re, im;  // real and imaginary parts, respectively.
			
 
				+} TfLiteComplex128;
			
 
				+
			
 
				+// Half precision data type compatible with the C99 definition.
			
 
				+typedef struct TfLiteFloat16 {
			
 
				+  uint16_t data;
			
 
				+} TfLiteFloat16;
			
 
				+
			
 
				+// Types supported by tensor
			
 
				+typedef enum {
			
 
				+  kTfLiteNoType = 0,
			
 
				+  kTfLiteFloat32 = 1,
			
 
				+  kTfLiteInt32 = 2,
			
 
				+  kTfLiteUInt8 = 3,
			
 
				+  kTfLiteInt64 = 4,
			
 
				+  kTfLiteString = 5,
			
 
				+  kTfLiteBool = 6,
			
 
				+  kTfLiteInt16 = 7,
			
 
				+  kTfLiteComplex64 = 8,
			
 
				+  kTfLiteInt8 = 9,
			
 
				+  kTfLiteFloat16 = 10,
			
 
				+  kTfLiteFloat64 = 11,
			
 
				+  kTfLiteComplex128 = 12,
			
 
				+} TfLiteType;
			
 
				+
			
 
				+// Return the name of a given type, for error reporting purposes.
			
 
				+const char* TfLiteTypeGetName(TfLiteType type);
			
 
				+
			
 
				+// SupportedQuantizationTypes.
			
 
				+typedef enum TfLiteQuantizationType {
			
 
				+  // No quantization.
			
 
				+  kTfLiteNoQuantization = 0,
			
 
				+  // Affine quantization (with support for per-channel quantization).
			
 
				+  // Corresponds to TfLiteAffineQuantization.
			
 
				+  kTfLiteAffineQuantization = 1,
			
 
				+} TfLiteQuantizationType;
			
 
				+
			
 
				+// Structure specifying the quantization used by the tensor, if-any.
			
 
				+typedef struct TfLiteQuantization {
			
 
				+  // The type of quantization held by params.
			
 
				+  TfLiteQuantizationType type;
			
 
				+  // Holds a reference to one of the quantization param structures specified
			
 
				+  // below.
			
 
				+  void* params;
			
 
				+} TfLiteQuantization;
			
 
				+
			
 
				+// Legacy. Will be deprecated in favor of TfLiteAffineQuantization.
			
 
				+// If per-layer quantization is specified this field will still be populated in
			
 
				+// addition to TfLiteAffineQuantization.
			
 
				+// Parameters for asymmetric quantization. Quantized values can be converted
			
 
				+// back to float using:
			
 
				+//     real_value = scale * (quantized_value - zero_point)
			
 
				+typedef struct TfLiteQuantizationParams {
			
 
				+  float scale;
			
 
				+  int32_t zero_point;
			
 
				+} TfLiteQuantizationParams;
			
 
				+
			
 
				+// Parameters for asymmetric quantization across a dimension (i.e per output
			
 
				+// channel quantization).
			
 
				+// quantized_dimension specifies which dimension the scales and zero_points
			
 
				+// correspond to.
			
 
				+// For a particular value in quantized_dimension, quantized values can be
			
 
				+// converted back to float using:
			
 
				+//     real_value = scale * (quantized_value - zero_point)
			
 
				+typedef struct TfLiteAffineQuantization {
			
 
				+  TfLiteFloatArray* scale;
			
 
				+  TfLiteIntArray* zero_point;
			
 
				+  int32_t quantized_dimension;
			
 
				+} TfLiteAffineQuantization;
			
 
				+
			
 
				+/* A union of pointers that points to memory for a given tensor. */
			
 
				+typedef union TfLitePtrUnion {
			
 
				+  /* Do not access these members directly, if possible, use
			
 
				+   * GetTensorData<TYPE>(tensor) instead, otherwise only access .data, as other
			
 
				+   * members are deprecated. */
			
 
				+  int32_t* i32;
			
 
				+  int64_t* i64;
			
 
				+  float* f;
			
 
				+  TfLiteFloat16* f16;
			
 
				+  double* f64;
			
 
				+  char* raw;
			
 
				+  const char* raw_const;
			
 
				+  uint8_t* uint8;
			
 
				+  bool* b;
			
 
				+  int16_t* i16;
			
 
				+  TfLiteComplex64* c64;
			
 
				+  TfLiteComplex128* c128;
			
 
				+  int8_t* int8;
			
 
				+  /* Only use this member. */
			
 
				+  void* data;
			
 
				+} TfLitePtrUnion;
			
 
				+
			
 
				+// Memory allocation strategies.
			
 
				+//  * kTfLiteMmapRo: Read-only memory-mapped data, or data externally allocated.
			
 
				+//  * kTfLiteArenaRw: Arena allocated with no guarantees about persistence,
			
 
				+//        and available during eval.
			
 
				+//  * kTfLiteArenaRwPersistent: Arena allocated but persistent across eval, and
			
 
				+//        only available during eval.
			
 
				+//  * kTfLiteDynamic: Allocated during eval, or for string tensors.
			
 
				+//  * kTfLitePersistentRo: Allocated and populated during prepare. This is
			
 
				+//        useful for tensors that can be computed during prepare and treated
			
 
				+//        as constant inputs for downstream ops (also in prepare).
			
 
				+typedef enum TfLiteAllocationType {
			
 
				+  kTfLiteMemNone = 0,
			
 
				+  kTfLiteMmapRo,
			
 
				+  kTfLiteArenaRw,
			
 
				+  kTfLiteArenaRwPersistent,
			
 
				+  kTfLiteDynamic,
			
 
				+  kTfLitePersistentRo,
			
 
				+} TfLiteAllocationType;
			
 
				+
			
 
				+// The delegates should use zero or positive integers to represent handles.
			
 
				+// -1 is reserved from unallocated status.
			
 
				+typedef int TfLiteBufferHandle;
			
 
				+enum {
			
 
				+  kTfLiteNullBufferHandle = -1,
			
 
				+};
			
 
				+
			
 
				+// Storage format of each dimension in a sparse tensor.
			
 
				+typedef enum TfLiteDimensionType {
			
 
				+  kTfLiteDimDense = 0,
			
 
				+  kTfLiteDimSparseCSR,
			
 
				+} TfLiteDimensionType;
			
 
				+
			
 
				+// Metadata to encode each dimension in a sparse tensor.
			
 
				+typedef struct TfLiteDimensionMetadata {
			
 
				+  TfLiteDimensionType format;
			
 
				+  int dense_size;
			
 
				+  TfLiteIntArray* array_segments;
			
 
				+  TfLiteIntArray* array_indices;
			
 
				+} TfLiteDimensionMetadata;
			
 
				+
			
 
				+// Parameters used to encode a sparse tensor. For detailed explanation of each
			
 
				+// field please refer to lite/schema/schema.fbs.
			
 
				+typedef struct TfLiteSparsity {
			
 
				+  TfLiteIntArray* traversal_order;
			
 
				+  TfLiteIntArray* block_map;
			
 
				+  TfLiteDimensionMetadata* dim_metadata;
			
 
				+  int dim_metadata_size;
			
 
				+} TfLiteSparsity;
			
 
				+
			
 
				+// An tensor in the interpreter system which is a wrapper around a buffer of
			
 
				+// data including a dimensionality (or NULL if not currently defined).
			
 
				+#ifndef TF_LITE_STATIC_MEMORY
			
 
				+typedef struct TfLiteTensor {
			
 
				+  // The data type specification for data stored in `data`. This affects
			
 
				+  // what member of `data` union should be used.
			
 
				+  TfLiteType type;
			
 
				+  // A union of data pointers. The appropriate type should be used for a typed
			
 
				+  // tensor based on `type`.
			
 
				+  TfLitePtrUnion data;
			
 
				+  // A pointer to a structure representing the dimensionality interpretation
			
 
				+  // that the buffer should have. NOTE: the product of elements of `dims`
			
 
				+  // and the element datatype size should be equal to `bytes` below.
			
 
				+  TfLiteIntArray* dims;
			
 
				+  // Quantization information.
			
 
				+  TfLiteQuantizationParams params;
			
 
				+  // How memory is mapped
			
 
				+  //  kTfLiteMmapRo: Memory mapped read only.
			
 
				+  //  i.e. weights
			
 
				+  //  kTfLiteArenaRw: Arena allocated read write memory
			
 
				+  //  (i.e. temporaries, outputs).
			
 
				+  TfLiteAllocationType allocation_type;
			
 
				+  // The number of bytes required to store the data of this Tensor. I.e.
			
 
				+  // (bytes of each element) * dims[0] * ... * dims[n-1].  For example, if
			
 
				+  // type is kTfLiteFloat32 and dims = {3, 2} then
			
 
				+  // bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
			
 
				+  size_t bytes;
			
 
				+
			
 
				+  // An opaque pointer to a tflite::MMapAllocation
			
 
				+  const void* allocation;
			
 
				+
			
 
				+  // Null-terminated name of this tensor.
			
 
				+  const char* name;
			
 
				+
			
 
				+  // The delegate which knows how to handle `buffer_handle`.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  struct TfLiteDelegate* delegate;
			
 
				+
			
 
				+  // An integer buffer handle that can be handled by `delegate`.
			
 
				+  // The value is valid only when delegate is not null.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  TfLiteBufferHandle buffer_handle;
			
 
				+
			
 
				+  // If the delegate uses its own buffer (e.g. GPU memory), the delegate is
			
 
				+  // responsible to set data_is_stale to true.
			
 
				+  // `delegate->CopyFromBufferHandle` can be called to copy the data from
			
 
				+  // delegate buffer.
			
 
				+  // WARNING: This is an // experimental interface that is subject to change.
			
 
				+  bool data_is_stale;
			
 
				+
			
 
				+  // True if the tensor is a variable.
			
 
				+  bool is_variable;
			
 
				+
			
 
				+  // Quantization information. Replaces params field above.
			
 
				+  TfLiteQuantization quantization;
			
 
				+
			
 
				+  // Parameters used to encode a sparse tensor.
			
 
				+  // This is optional. The field is NULL if a tensor is dense.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  TfLiteSparsity* sparsity;
			
 
				+
			
 
				+  // Optional. Encodes shapes with unknown dimensions with -1. This field is
			
 
				+  // only populated when unknown dimensions exist in a read-write tensor (i.e.
			
 
				+  // an input or output tensor). (e.g.  `dims` contains [1, 1, 1, 3] and
			
 
				+  // `dims_signature` contains [1, -1, -1, 3]).
			
 
				+  const TfLiteIntArray* dims_signature;
			
 
				+} TfLiteTensor;
			
 
				+
			
 
				+// A structure representing an instance of a node.
			
 
				+// This structure only exhibits the inputs, outputs and user defined data, not
			
 
				+// other features like the type.
			
 
				+typedef struct TfLiteNode {
			
 
				+  // Inputs to this node expressed as indices into the simulator's tensors.
			
 
				+  TfLiteIntArray* inputs;
			
 
				+
			
 
				+  // Outputs to this node expressed as indices into the simulator's tensors.
			
 
				+  TfLiteIntArray* outputs;
			
 
				+
			
 
				+  // intermediate tensors to this node expressed as indices into the simulator's
			
 
				+  // tensors.
			
 
				+  TfLiteIntArray* intermediates;
			
 
				+
			
 
				+  // Temporary tensors uses during the computations. This usually contains no
			
 
				+  // tensors, but ops are allowed to change that if they need scratch space of
			
 
				+  // any sort.
			
 
				+  TfLiteIntArray* temporaries;
			
 
				+
			
 
				+  // Opaque data provided by the node implementer through `Registration.init`.
			
 
				+  void* user_data;
			
 
				+
			
 
				+  // Opaque data provided to the node if the node is a builtin. This is usually
			
 
				+  // a structure defined in builtin_op_data.h
			
 
				+  void* builtin_data;
			
 
				+
			
 
				+  // Custom initial data. This is the opaque data provided in the flatbuffer.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  const void* custom_initial_data;
			
 
				+  int custom_initial_data_size;
			
 
				+
			
 
				+  // The pointer to the delegate. This is non-null only when the node is
			
 
				+  // created by calling `interpreter.ModifyGraphWithDelegate`.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  struct TfLiteDelegate* delegate;
			
 
				+} TfLiteNode;
			
 
				+#else  // defined(TF_LITE_STATIC_MEMORY)?
			
 
				+// NOTE: This flag is opt-in only at compile time.
			
 
				+//
			
 
				+// Specific reduced TfLiteTensor struct for TF Micro runtime. This struct
			
 
				+// contains only the minimum fields required to initialize and prepare a micro
			
 
				+// inference graph. The fields in this struct have been ordered from
			
 
				+// largest-to-smallest for optimal struct sizeof.
			
 
				+//
			
 
				+// This struct does not use:
			
 
				+// - allocation
			
 
				+// - buffer_handle
			
 
				+// - data_is_stale
			
 
				+// - delegate
			
 
				+// - dims_signature
			
 
				+// - name
			
 
				+// - sparsity
			
 
				+typedef struct TfLiteTensor {
			
 
				+  // TODO(b/155784997): Consider consolidating these quantization fields:
			
 
				+  // Quantization information. Replaces params field above.
			
 
				+  TfLiteQuantization quantization;
			
 
				+
			
 
				+  // Quantization information.
			
 
				+  TfLiteQuantizationParams params;
			
 
				+
			
 
				+  // A union of data pointers. The appropriate type should be used for a typed
			
 
				+  // tensor based on `type`.
			
 
				+  TfLitePtrUnion data;
			
 
				+
			
 
				+  // A pointer to a structure representing the dimensionality interpretation
			
 
				+  // that the buffer should have. NOTE: the product of elements of `dims`
			
 
				+  // and the element datatype size should be equal to `bytes` below.
			
 
				+  TfLiteIntArray* dims;
			
 
				+
			
 
				+  // The number of bytes required to store the data of this Tensor. I.e.
			
 
				+  // (bytes of each element) * dims[0] * ... * dims[n-1].  For example, if
			
 
				+  // type is kTfLiteFloat32 and dims = {3, 2} then
			
 
				+  // bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
			
 
				+  size_t bytes;
			
 
				+
			
 
				+  // The data type specification for data stored in `data`. This affects
			
 
				+  // what member of `data` union should be used.
			
 
				+  TfLiteType type;
			
 
				+
			
 
				+  // How memory is mapped
			
 
				+  //  kTfLiteMmapRo: Memory mapped read only.
			
 
				+  //  i.e. weights
			
 
				+  //  kTfLiteArenaRw: Arena allocated read write memory
			
 
				+  //  (i.e. temporaries, outputs).
			
 
				+  TfLiteAllocationType allocation_type;
			
 
				+
			
 
				+  // True if the tensor is a variable.
			
 
				+  bool is_variable;
			
 
				+} TfLiteTensor;
			
 
				+
			
 
				+// Specific reduced TfLiteNode struct for TF Micro runtime. This struct contains
			
 
				+// only the minimum fields required to represent a node.
			
 
				+//
			
 
				+// This struct does not use:
			
 
				+// - delegate
			
 
				+// - intermediates
			
 
				+// - temporaries
			
 
				+typedef struct TfLiteNode {
			
 
				+  // Inputs to this node expressed as indices into the simulator's tensors.
			
 
				+  TfLiteIntArray* inputs;
			
 
				+
			
 
				+  // Outputs to this node expressed as indices into the simulator's tensors.
			
 
				+  TfLiteIntArray* outputs;
			
 
				+
			
 
				+  // Opaque data provided by the node implementer through `Registration.init`.
			
 
				+  void* user_data;
			
 
				+
			
 
				+  // Opaque data provided to the node if the node is a builtin. This is usually
			
 
				+  // a structure defined in builtin_op_data.h
			
 
				+  void* builtin_data;
			
 
				+
			
 
				+  // Custom initial data. This is the opaque data provided in the flatbuffer.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  const void* custom_initial_data;
			
 
				+  int custom_initial_data_size;
			
 
				+} TfLiteNode;
			
 
				+#endif  // TF_LITE_STATIC_MEMORY
			
 
				+
			
 
				+// Light-weight tensor struct for TF Micro runtime. Provides the minimal amount
			
 
				+// of information required for a kernel to run during TfLiteRegistration::Eval.
			
 
				+// TODO(b/160955687): Move this field into TF_LITE_STATIC_MEMORY when TFLM
			
 
				+// builds with this flag by default internally.
			
 
				+typedef struct TfLiteEvalTensor {
			
 
				+  // A union of data pointers. The appropriate type should be used for a typed
			
 
				+  // tensor based on `type`.
			
 
				+  TfLitePtrUnion data;
			
 
				+
			
 
				+  // A pointer to a structure representing the dimensionality interpretation
			
 
				+  // that the buffer should have.
			
 
				+  TfLiteIntArray* dims;
			
 
				+
			
 
				+  // The data type specification for data stored in `data`. This affects
			
 
				+  // what member of `data` union should be used.
			
 
				+  TfLiteType type;
			
 
				+} TfLiteEvalTensor;
			
 
				+
			
 
				+#ifndef TF_LITE_STATIC_MEMORY
			
 
				+// Free data memory of tensor `t`.
			
 
				+void TfLiteTensorDataFree(TfLiteTensor* t);
			
 
				+
			
 
				+// Free quantization data.
			
 
				+void TfLiteQuantizationFree(TfLiteQuantization* quantization);
			
 
				+
			
 
				+// Free sparsity parameters.
			
 
				+void TfLiteSparsityFree(TfLiteSparsity* sparsity);
			
 
				+
			
 
				+// Free memory of tensor `t`.
			
 
				+void TfLiteTensorFree(TfLiteTensor* t);
			
 
				+
			
 
				+// Set all of a tensor's fields (and free any previously allocated data).
			
 
				+void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
			
 
				+                       TfLiteQuantizationParams quantization, char* buffer,
			
 
				+                       size_t size, TfLiteAllocationType allocation_type,
			
 
				+                       const void* allocation, bool is_variable,
			
 
				+                       TfLiteTensor* tensor);
			
 
				+
			
 
				+// Resize the allocated data of a (dynamic) tensor. Tensors with allocation
			
 
				+// types other than kTfLiteDynamic will be ignored.
			
 
				+void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
			
 
				+#endif  // TF_LITE_STATIC_MEMORY
			
 
				+
			
 
				+// WARNING: This is an experimental interface that is subject to change.
			
 
				+//
			
 
				+// Currently, TfLiteDelegateParams has to be allocated in a way that it's
			
 
				+// trivially destructable. It will be stored as `builtin_data` field in
			
 
				+// `TfLiteNode` of the delegate node.
			
 
				+//
			
 
				+// See also the `CreateDelegateParams` function in `interpreter.cc` details.
			
 
				+typedef struct TfLiteDelegateParams {
			
 
				+  struct TfLiteDelegate* delegate;
			
 
				+  TfLiteIntArray* nodes_to_replace;
			
 
				+  TfLiteIntArray* input_tensors;
			
 
				+  TfLiteIntArray* output_tensors;
			
 
				+} TfLiteDelegateParams;
			
 
				+
			
 
				+typedef struct TfLiteContext {
			
 
				+  // Number of tensors in the context.
			
 
				+  size_t tensors_size;
			
 
				+
			
 
				+  // The execution plan contains a list of the node indices in execution
			
 
				+  // order. execution_plan->size is the current number of nodes. And,
			
 
				+  // execution_plan->data[0] is the first node that needs to be run.
			
 
				+  // TfLiteDelegates can traverse the current execution plan by iterating
			
 
				+  // through each member of this array and using GetNodeAndRegistration() to
			
 
				+  // access details about a node. i.e.
			
 
				+  // TfLiteIntArray* execution_plan;
			
 
				+  // TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan));
			
 
				+  // for (int exec_index = 0; exec_index < execution_plan->size; exec_index++) {
			
 
				+  //    int node_index = execution_plan->data[exec_index];
			
 
				+  //    TfLiteNode* node;
			
 
				+  //    TfLiteRegistration* reg;
			
 
				+  //    context->GetNodeAndRegistration(context, node_index, &node, &reg);
			
 
				+  // }
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  TfLiteStatus (*GetExecutionPlan)(struct TfLiteContext* context,
			
 
				+                                   TfLiteIntArray** execution_plan);
			
 
				+
			
 
				+  // An array of tensors in the interpreter context (of length `tensors_size`)
			
 
				+  TfLiteTensor* tensors;
			
 
				+
			
 
				+  // opaque full context ptr (an opaque c++ data structure)
			
 
				+  void* impl_;
			
 
				+
			
 
				+  // Request memory pointer be resized. Updates dimensions on the tensor.
			
 
				+  // NOTE: ResizeTensor takes ownership of newSize.
			
 
				+  TfLiteStatus (*ResizeTensor)(struct TfLiteContext*, TfLiteTensor* tensor,
			
 
				+                               TfLiteIntArray* new_size);
			
 
				+  // Request that an error be reported with format string msg.
			
 
				+  void (*ReportError)(struct TfLiteContext*, const char* msg, ...);
			
 
				+
			
 
				+  // Add `tensors_to_add` tensors, preserving pre-existing Tensor entries.  If
			
 
				+  // non-null, the value pointed to by `first_new_tensor_index` will be set to
			
 
				+  // the index of the first new tensor.
			
 
				+  TfLiteStatus (*AddTensors)(struct TfLiteContext*, int tensors_to_add,
			
 
				+                             int* first_new_tensor_index);
			
 
				+
			
 
				+  // Get a Tensor node by node_index.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  TfLiteStatus (*GetNodeAndRegistration)(
			
 
				+      struct TfLiteContext*, int node_index, TfLiteNode** node,
			
 
				+      struct TfLiteRegistration** registration);
			
 
				+
			
 
				+  // Replace ops with one or more stub delegate operations. This function
			
 
				+  // does not take ownership of `nodes_to_replace`.
			
 
				+  TfLiteStatus (*ReplaceNodeSubsetsWithDelegateKernels)(
			
 
				+      struct TfLiteContext*, struct TfLiteRegistration registration,
			
 
				+      const TfLiteIntArray* nodes_to_replace, struct TfLiteDelegate* delegate);
			
 
				+
			
 
				+  // Number of threads that are recommended to subsystems like gemmlowp and
			
 
				+  // eigen.
			
 
				+  int recommended_num_threads;
			
 
				+
			
 
				+  // Access external contexts by type.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  TfLiteExternalContext* (*GetExternalContext)(struct TfLiteContext*,
			
 
				+                                               TfLiteExternalContextType);
			
 
				+  // Set the value of a external context. Does not take ownership of the
			
 
				+  // pointer.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  void (*SetExternalContext)(struct TfLiteContext*, TfLiteExternalContextType,
			
 
				+                             TfLiteExternalContext*);
			
 
				+
			
 
				+  // Flag for allowing float16 precision for FP32 calculation.
			
 
				+  // default: false.
			
 
				+  // WARNING: This is an experimental API and subject to change.
			
 
				+  bool allow_fp32_relax_to_fp16;
			
 
				+
			
 
				+  // Pointer to the op-level profiler, if set; nullptr otherwise.
			
 
				+  void* profiler;
			
 
				+
			
 
				+  // Allocate persistent buffer which has the same life time as the interpreter.
			
 
				+  // Returns nullptr on failure.
			
 
				+  // The memory is allocated from heap for TFL, and from tail in TFLM.
			
 
				+  // This method is only available in Init or Prepare stage.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  void* (*AllocatePersistentBuffer)(struct TfLiteContext* ctx, size_t bytes);
			
 
				+
			
 
				+  // Allocate a buffer which will be deallocated right after invoke phase.
			
 
				+  // The memory is allocated from heap in TFL, and from volatile arena in TFLM.
			
 
				+  // This method is only available in invoke stage.
			
 
				+  // NOTE: If possible use RequestScratchBufferInArena method to avoid memory
			
 
				+  // allocation during inference time.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  TfLiteStatus (*AllocateBufferForEval)(struct TfLiteContext* ctx, size_t bytes,
			
 
				+                                        void** ptr);
			
 
				+
			
 
				+  // Request a scratch buffer in the arena through static memory planning.
			
 
				+  // This method is only available in Prepare stage and the buffer is allocated
			
 
				+  // by the interpreter between Prepare and Eval stage. In Eval stage,
			
 
				+  // GetScratchBuffer API can be used to fetch the address.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  TfLiteStatus (*RequestScratchBufferInArena)(struct TfLiteContext* ctx,
			
 
				+                                              size_t bytes, int* buffer_idx);
			
 
				+
			
 
				+  // Get the scratch buffer pointer.
			
 
				+  // This method is only available in Eval stage.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  void* (*GetScratchBuffer)(struct TfLiteContext* ctx, int buffer_idx);
			
 
				+
			
 
				+  // Resize the memory pointer of the `tensor`. This method behaves the same as
			
 
				+  // `ResizeTensor`, except that it makes a copy of the shape array internally
			
 
				+  // so the shape array could be deallocated right afterwards.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  TfLiteStatus (*ResizeTensorExplicit)(struct TfLiteContext* ctx,
			
 
				+                                       TfLiteTensor* tensor, int dims,
			
 
				+                                       const int* shape);
			
 
				+
			
 
				+  // This method provides a preview of post-delegation partitioning. Each
			
 
				+  // TfLiteDelegateParams in the referenced array corresponds to one instance of
			
 
				+  // the delegate kernel.
			
 
				+  // Example usage:
			
 
				+  //
			
 
				+  // TfLiteIntArray* nodes_to_replace = ...;
			
 
				+  // TfLiteDelegateParams* params_array;
			
 
				+  // int num_partitions = 0;
			
 
				+  // TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
			
 
				+  //    context, delegate, nodes_to_replace, &params_array, &num_partitions));
			
 
				+  // for (int idx = 0; idx < num_partitions; idx++) {
			
 
				+  //    const auto& partition_params = params_array[idx];
			
 
				+  //    ...
			
 
				+  // }
			
 
				+  //
			
 
				+  // NOTE: The context owns the memory referenced by partition_params_array. It
			
 
				+  // will be cleared with another call to PreviewDelegateParitioning, or after
			
 
				+  // TfLiteDelegateParams::Prepare returns.
			
 
				+  //
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  TfLiteStatus (*PreviewDelegatePartitioning)(
			
 
				+      struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
			
 
				+      TfLiteDelegateParams** partition_params_array, int* num_partitions);
			
 
				+
			
 
				+  // Returns a TfLiteTensor struct for a given index.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  // WARNING: This method may not be available on all platforms.
			
 
				+  TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context,
			
 
				+                             int tensor_idx);
			
 
				+
			
 
				+  // Returns a TfLiteEvalTensor struct for a given index.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  // WARNING: This method may not be available on all platforms.
			
 
				+  TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context,
			
 
				+                                     int tensor_idx);
			
 
				+} TfLiteContext;
			
 
				+
			
 
				+typedef struct TfLiteRegistration {
			
 
				+  // Initializes the op from serialized data.
			
 
				+  // If a built-in op:
			
 
				+  //   `buffer` is the op's params data (TfLiteLSTMParams*).
			
 
				+  //   `length` is zero.
			
 
				+  // If custom op:
			
 
				+  //   `buffer` is the op's `custom_options`.
			
 
				+  //   `length` is the size of the buffer.
			
 
				+  //
			
 
				+  // Returns a type-punned (i.e. void*) opaque data (e.g. a primitive pointer
			
 
				+  // or an instance of a struct).
			
 
				+  //
			
 
				+  // The returned pointer will be stored with the node in the `user_data` field,
			
 
				+  // accessible within prepare and invoke functions below.
			
 
				+  // NOTE: if the data is already in the desired format, simply implement this
			
 
				+  // function to return `nullptr` and implement the free function to be a no-op.
			
 
				+  void* (*init)(TfLiteContext* context, const char* buffer, size_t length);
			
 
				+
			
 
				+  // The pointer `buffer` is the data previously returned by an init invocation.
			
 
				+  void (*free)(TfLiteContext* context, void* buffer);
			
 
				+
			
 
				+  // prepare is called when the inputs this node depends on have been resized.
			
 
				+  // context->ResizeTensor() can be called to request output tensors to be
			
 
				+  // resized.
			
 
				+  //
			
 
				+  // Returns kTfLiteOk on success.
			
 
				+  TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node);
			
 
				+
			
 
				+  // Execute the node (should read node->inputs and output to node->outputs).
			
 
				+  // Returns kTfLiteOk on success.
			
 
				+  TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node);
			
 
				+
			
 
				+  // profiling_string is called during summarization of profiling information
			
 
				+  // in order to group executions together. Providing a value here will cause a
			
 
				+  // given op to appear multiple times is the profiling report. This is
			
 
				+  // particularly useful for custom ops that can perform significantly
			
 
				+  // different calculations depending on their `user-data`.
			
 
				+  const char* (*profiling_string)(const TfLiteContext* context,
			
 
				+                                  const TfLiteNode* node);
			
 
				+
			
 
				+  // Builtin codes. If this kernel refers to a builtin this is the code
			
 
				+  // of the builtin. This is so we can do marshaling to other frameworks like
			
 
				+  // NN API.
			
 
				+  // Note: It is the responsibility of the registration binder to set this
			
 
				+  // properly.
			
 
				+  int32_t builtin_code;
			
 
				+
			
 
				+  // Custom op name. If the op is a builtin, this will be null.
			
 
				+  // Note: It is the responsibility of the registration binder to set this
			
 
				+  // properly.
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  const char* custom_name;
			
 
				+
			
 
				+  // The version of the op.
			
 
				+  // Note: It is the responsibility of the registration binder to set this
			
 
				+  // properly.
			
 
				+  int version;
			
 
				+} TfLiteRegistration;
			
 
				+
			
 
				+// The flags used in `TfLiteDelegate`. Note that this is a bitmask, so the
			
 
				+// values should be 1, 2, 4, 8, ...etc.
			
 
				+typedef enum TfLiteDelegateFlags {
			
 
				+  kTfLiteDelegateFlagsNone = 0,
			
 
				+  // The flag is set if the delegate can handle dynamic sized tensors.
			
 
				+  // For example, the output shape of a `Resize` op with non-constant shape
			
 
				+  // can only be inferred when the op is invoked.
			
 
				+  // In this case, the Delegate is responsible for calling
			
 
				+  // `SetTensorToDynamic` to mark the tensor as a dynamic tensor, and calling
			
 
				+  // `ResizeTensor` when invoking the op.
			
 
				+  //
			
 
				+  // If the delegate isn't capable to handle dynamic tensors, this flag need
			
 
				+  // to be set to false.
			
 
				+  kTfLiteDelegateFlagsAllowDynamicTensors = 1,
			
 
				+
			
 
				+  // This flag can be used by delegates (that allow dynamic tensors) to ensure
			
 
				+  // applicable tensor shapes are automatically propagated in the case of tensor
			
 
				+  // resizing.
			
 
				+  // This means that non-dynamic (allocation_type != kTfLiteDynamic) I/O tensors
			
 
				+  // of a delegate kernel will have correct shapes before its Prepare() method
			
 
				+  // is called. The runtime leverages TFLite builtin ops in the original
			
 
				+  // execution plan to propagate shapes.
			
 
				+  //
			
 
				+  // A few points to note:
			
 
				+  // 1. This requires kTfLiteDelegateFlagsAllowDynamicTensors. If that flag is
			
 
				+  // false, this one is redundant since the delegate kernels are re-initialized
			
 
				+  // every time tensors are resized.
			
 
				+  // 2. Enabling this flag adds some overhead to AllocateTensors(), since extra
			
 
				+  // work is required to prepare the original execution plan.
			
 
				+  // 3. This flag requires that the original execution plan only have ops with
			
 
				+  // valid registrations (and not 'dummy' custom ops like with Flex).
			
 
				+  // WARNING: This feature is experimental and subject to change.
			
 
				+  kTfLiteDelegateFlagsRequirePropagatedShapes = 2
			
 
				+} TfLiteDelegateFlags;
			
 
				+
			
 
				+// WARNING: This is an experimental interface that is subject to change.
			
 
				+typedef struct TfLiteDelegate {
			
 
				+  // Data that delegate needs to identify itself. This data is owned by the
			
 
				+  // delegate. The delegate is owned in the user code, so the delegate is
			
 
				+  // responsible for doing this when it is destroyed.
			
 
				+  void* data_;
			
 
				+
			
 
				+  // Invoked by ModifyGraphWithDelegate. This prepare is called, giving the
			
 
				+  // delegate a view of the current graph through TfLiteContext*. It typically
			
 
				+  // will look at the nodes and call ReplaceNodeSubsetsWithDelegateKernels()
			
 
				+  // to ask the TensorFlow lite runtime to create macro-nodes to represent
			
 
				+  // delegated subgraphs of the original graph.
			
 
				+  TfLiteStatus (*Prepare)(TfLiteContext* context,
			
 
				+                          struct TfLiteDelegate* delegate);
			
 
				+
			
 
				+  // Copy the data from delegate buffer handle into raw memory of the given
			
 
				+  // 'tensor'. Note that the delegate is allowed to allocate the raw bytes as
			
 
				+  // long as it follows the rules for kTfLiteDynamic tensors, in which case this
			
 
				+  // cannot be null.
			
 
				+  TfLiteStatus (*CopyFromBufferHandle)(TfLiteContext* context,
			
 
				+                                       struct TfLiteDelegate* delegate,
			
 
				+                                       TfLiteBufferHandle buffer_handle,
			
 
				+                                       TfLiteTensor* tensor);
			
 
				+
			
 
				+  // Copy the data from raw memory of the given 'tensor' to delegate buffer
			
 
				+  // handle. This can be null if the delegate doesn't use its own buffer.
			
 
				+  TfLiteStatus (*CopyToBufferHandle)(TfLiteContext* context,
			
 
				+                                     struct TfLiteDelegate* delegate,
			
 
				+                                     TfLiteBufferHandle buffer_handle,
			
 
				+                                     TfLiteTensor* tensor);
			
 
				+
			
 
				+  // Free the Delegate Buffer Handle. Note: This only frees the handle, but
			
 
				+  // this doesn't release the underlying resource (e.g. textures). The
			
 
				+  // resources are either owned by application layer or the delegate.
			
 
				+  // This can be null if the delegate doesn't use its own buffer.
			
 
				+  void (*FreeBufferHandle)(TfLiteContext* context,
			
 
				+                           struct TfLiteDelegate* delegate,
			
 
				+                           TfLiteBufferHandle* handle);
			
 
				+
			
 
				+  // Bitmask flags. See the comments in `TfLiteDelegateFlags`.
			
 
				+  int64_t flags;
			
 
				+} TfLiteDelegate;
			
 
				+
			
 
				+// Build a 'null' delegate, with all the fields properly set to their default
			
 
				+// values.
			
 
				+TfLiteDelegate TfLiteDelegateCreate();
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif  // __cplusplus
			
 
				+#endif  // TENSORFLOW_LITE_C_COMMON_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/core/SConscript
+++ b/tensorflow/lite/micro/tensorflow/lite/core/SConscript
@@ -0,0 +1,16 @@
 
				+# RT-Thread building script for bridge
			
 
				+
			
 
				+import os
			
 
				+from building import *
			
 
				+
			
 
				+cwd = GetCurrentDir()
			
 
				+objs = []
			
 
				+list = os.listdir(cwd)
			
 
				+
			
 
				+if GetDepend('PKG_USING_TENSORFLOWLITEMICRO'):
			
 
				+    for d in list:
			
 
				+        path = os.path.join(cwd, d)
			
 
				+        if os.path.isfile(os.path.join(path, 'SConscript')):
			
 
				+            objs = objs + SConscript(os.path.join(d, 'SConscript'))
			
 
				+
			
 
				+Return('objs')
			
--- a/tensorflow/lite/micro/tensorflow/lite/core/api/SConscript
+++ b/tensorflow/lite/micro/tensorflow/lite/core/api/SConscript
@@ -0,0 +1,29 @@
 
				+from building import *
			
 
				+import os
			
 
				+
			
 
				+cwd     = GetCurrentDir()
			
 
				+src     = Glob('*.c') + Glob('*.cc')
			
 
				+
			
 
				+#.
			
 
				+root =  str(Dir('#'))
			
 
				+packages = os.path.join(root, 'packages')
			
 
				+file_list = os.listdir(packages)
			
 
				+for f in file_list:
			
 
				+    if(f.split('-')[0] == 'TensorflowLiteMicro'):
			
 
				+        tflm_pkg = os.path.join(packages, f)
			
 
				+        break
			
 
				+#./third_party/flatbuffer/include
			
 
				+flatbuffer = os.path.join(tflm_pkg, "third_party/flatbuffers/include")
			
 
				+#./third_party/gemmlowp
			
 
				+gemmlowp = os.path.join(tflm_pkg, "third_party/gemmlowp")
			
 
				+#./third_party/kissfft
			
 
				+kissfft = os.path.join(tflm_pkg, "third_party/kissfft")
			
 
				+#./third_party/ruy
			
 
				+ruy = os.path.join(tflm_pkg, "third_party/ruy")
			
 
				+
			
 
				+
			
 
				+CPPPATH = [tflm_pkg, flatbuffer, gemmlowp, kissfft, ruy]
			
 
				+
			
 
				+group = DefineGroup('lite', src, depend = ['PKG_USING_TENSORFLOWLITEMICRO'], CPPPATH = CPPPATH)
			
 
				+
			
 
				+Return('group')
			
--- a/tensorflow/lite/micro/tensorflow/lite/core/api/error_reporter.cc
+++ b/tensorflow/lite/micro/tensorflow/lite/core/api/error_reporter.cc
@@ -0,0 +1,38 @@
 
				+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/core/api/error_reporter.h"
			
 
				+#include <cstdarg>
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+int ErrorReporter::Report(const char* format, ...) {
			
 
				+  va_list args;
			
 
				+  va_start(args, format);
			
 
				+  int code = Report(format, args);
			
 
				+  va_end(args);
			
 
				+  return code;
			
 
				+}
			
 
				+
			
 
				+// TODO(aselle): Make the name of ReportError on context the same, so
			
 
				+// we can use the ensure functions w/o a context and w/ a reporter.
			
 
				+int ErrorReporter::ReportError(void*, const char* format, ...) {
			
 
				+  va_list args;
			
 
				+  va_start(args, format);
			
 
				+  int code = Report(format, args);
			
 
				+  va_end(args);
			
 
				+  return code;
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/tensorflow/lite/micro/tensorflow/lite/core/api/error_reporter.h
+++ b/tensorflow/lite/micro/tensorflow/lite/core/api/error_reporter.h
@@ -0,0 +1,59 @@
 
				+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
			
 
				+#define TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
			
 
				+
			
 
				+#include <cstdarg>
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+/// A functor that reports error to supporting system. Invoked similar to
			
 
				+/// printf.
			
 
				+///
			
 
				+/// Usage:
			
 
				+///  ErrorReporter foo;
			
 
				+///  foo.Report("test %d", 5);
			
 
				+/// or
			
 
				+///  va_list args;
			
 
				+///  foo.Report("test %d", args); // where args is va_list
			
 
				+///
			
 
				+/// Subclass ErrorReporter to provide another reporting destination.
			
 
				+/// For example, if you have a GUI program, you might redirect to a buffer
			
 
				+/// that drives a GUI error log box.
			
 
				+class ErrorReporter {
			
 
				+ public:
			
 
				+  virtual ~ErrorReporter() {}
			
 
				+  virtual int Report(const char* format, va_list args) = 0;
			
 
				+  int Report(const char* format, ...);
			
 
				+  int ReportError(void*, const char* format, ...);
			
 
				+};
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+// You should not make bare calls to the error reporter, instead use the
			
 
				+// TF_LITE_REPORT_ERROR macro, since this allows message strings to be
			
 
				+// stripped when the binary size has to be optimized. If you are looking to
			
 
				+// reduce binary size, define TF_LITE_STRIP_ERROR_STRINGS when compiling and
			
 
				+// every call will be stubbed out, taking no memory.
			
 
				+#ifndef TF_LITE_STRIP_ERROR_STRINGS
			
 
				+#define TF_LITE_REPORT_ERROR(reporter, ...)                             \
			
 
				+  do {                                                                  \
			
 
				+    static_cast<tflite::ErrorReporter*>(reporter)->Report(__VA_ARGS__); \
			
 
				+  } while (false)
			
 
				+#else  // TF_LITE_STRIP_ERROR_STRINGS
			
 
				+#define TF_LITE_REPORT_ERROR(reporter, ...)
			
 
				+#endif  // TF_LITE_STRIP_ERROR_STRINGS
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/lite/micro/tensorflow/lite/core/api/flatbuffer_conversions.cc
@@ -0,0 +1,1739 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
			
 
				+
			
 
				+#include <cstddef>
			
 
				+#include <cstdint>
			
 
				+#include <memory>
			
 
				+
			
 
				+#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
			
 
				+#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/core/api/error_reporter.h"
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/schema/schema_generated.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace {
			
 
				+
			
 
				+// Utility class for safely allocating POD data. This is useful for avoiding
			
 
				+// leaks in cases where op params are allocated but fail to propagate to the
			
 
				+// parsed op data (e.g., when model parameters are invalid).
			
 
				+class SafeBuiltinDataAllocator {
			
 
				+ public:
			
 
				+  class BuiltinDataDeleter {
			
 
				+   public:
			
 
				+    explicit BuiltinDataDeleter(BuiltinDataAllocator* allocator)
			
 
				+        : allocator_(allocator) {}
			
 
				+
			
 
				+    void operator()(void* data) { allocator_->Deallocate(data); }
			
 
				+
			
 
				+   private:
			
 
				+    BuiltinDataAllocator* allocator_;
			
 
				+  };
			
 
				+
			
 
				+  template <typename T>
			
 
				+  using BuiltinDataPtr = std::unique_ptr<T, BuiltinDataDeleter>;
			
 
				+
			
 
				+  explicit SafeBuiltinDataAllocator(BuiltinDataAllocator* allocator)
			
 
				+      : allocator_(allocator) {}
			
 
				+
			
 
				+  template <typename T>
			
 
				+  BuiltinDataPtr<T> Allocate() {
			
 
				+    return BuiltinDataPtr<T>(allocator_->AllocatePOD<T>(),
			
 
				+                             BuiltinDataDeleter(allocator_));
			
 
				+  }
			
 
				+
			
 
				+ private:
			
 
				+  BuiltinDataAllocator* allocator_;
			
 
				+};
			
 
				+
			
 
				+// All the Parse functions take some pointers as params and this function has
			
 
				+// the common DCHECKs to catch if any of those are nullptr.
			
 
				+void CheckParsePointerParams(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                             BuiltinDataAllocator* allocator,
			
 
				+                             void** builtin_data) {
			
 
				+  TFLITE_DCHECK(op != nullptr);
			
 
				+  TFLITE_DCHECK(error_reporter != nullptr);
			
 
				+  TFLITE_DCHECK(allocator != nullptr);
			
 
				+  TFLITE_DCHECK(builtin_data != nullptr);
			
 
				+}
			
 
				+
			
 
				+// Copies the contents from the flatbuffer int vector `flatbuffer` into the
			
 
				+// int array `buffer`. `flat_vector` and `buffer` represent the same
			
 
				+// configuration operation for a given operation.
			
 
				+TfLiteStatus FlatBufferIntVectorToArray(
			
 
				+    int max_size_of_buffer, const flatbuffers::Vector<int32_t>* flat_vector,
			
 
				+    int* buffer, ErrorReporter* error_reporter, const char* op_name) {
			
 
				+  if (!flat_vector) {
			
 
				+    TF_LITE_REPORT_ERROR(error_reporter,
			
 
				+                         "Input array not provided for operation '%s'.\n",
			
 
				+                         op_name);
			
 
				+    return kTfLiteError;
			
 
				+  } else {
			
 
				+    size_t num_dimensions = flat_vector->size();
			
 
				+    if (num_dimensions > max_size_of_buffer / sizeof(int)) {
			
 
				+      TF_LITE_REPORT_ERROR(
			
 
				+          error_reporter,
			
 
				+          "Found too many dimensions in the input array of operation '%s'.\n",
			
 
				+          op_name);
			
 
				+      return kTfLiteError;
			
 
				+    } else {
			
 
				+      for (size_t i = 0; i < num_dimensions; ++i) {
			
 
				+        buffer[i] = flat_vector->Get(i);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// Converts the flatbuffer activation to what is used at runtime.
			
 
				+TfLiteFusedActivation ConvertActivation(ActivationFunctionType activation) {
			
 
				+  switch (activation) {
			
 
				+    case ActivationFunctionType_NONE:
			
 
				+      return kTfLiteActNone;
			
 
				+    case ActivationFunctionType_RELU:
			
 
				+      return kTfLiteActRelu;
			
 
				+    case ActivationFunctionType_RELU_N1_TO_1:
			
 
				+      return kTfLiteActReluN1To1;
			
 
				+    case ActivationFunctionType_RELU6:
			
 
				+      return kTfLiteActRelu6;
			
 
				+    case ActivationFunctionType_TANH:
			
 
				+      return kTfLiteActTanh;
			
 
				+    case ActivationFunctionType_SIGN_BIT:
			
 
				+      return kTfLiteActSignBit;
			
 
				+  }
			
 
				+  return kTfLiteActNone;
			
 
				+}
			
 
				+
			
 
				+// Converts the flatbuffer padding enum to what is used at runtime.
			
 
				+TfLitePadding ConvertPadding(Padding padding) {
			
 
				+  switch (padding) {
			
 
				+    case Padding_SAME:
			
 
				+      return kTfLitePaddingSame;
			
 
				+    case Padding_VALID:
			
 
				+      return kTfLitePaddingValid;
			
 
				+  }
			
 
				+  return kTfLitePaddingUnknown;
			
 
				+}
			
 
				+
			
 
				+#ifndef TF_LITE_STATIC_MEMORY
			
 
				+TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
			
 
				+                               ErrorReporter* error_reporter,
			
 
				+                               BuiltinDataAllocator* allocator,
			
 
				+                               void** builtin_data) {
			
 
				+  auto parseLSHProjectionType = [](LSHProjectionType type) {
			
 
				+    switch (type) {
			
 
				+      case LSHProjectionType_SPARSE:
			
 
				+        return kTfLiteLshProjectionSparse;
			
 
				+      case LSHProjectionType_DENSE:
			
 
				+        return kTfLiteLshProjectionDense;
			
 
				+      default:
			
 
				+        return kTfLiteLshProjectionUnknown;
			
 
				+    }
			
 
				+  };
			
 
				+  auto parseCombinerType = [](CombinerType type) {
			
 
				+    switch (type) {
			
 
				+      case CombinerType_MEAN:
			
 
				+        return kTfLiteCombinerTypeMean;
			
 
				+      case CombinerType_SQRTN:
			
 
				+        return kTfLiteCombinerTypeSqrtn;
			
 
				+      case CombinerType_SUM:
			
 
				+      default:
			
 
				+        return kTfLiteCombinerTypeSum;
			
 
				+    }
			
 
				+  };
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  *builtin_data = nullptr;
			
 
				+  switch (op_type) {
			
 
				+    case BuiltinOperator_ABS: {
			
 
				+      return ParseAbs(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_ADD: {
			
 
				+      return ParseAdd(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_ARG_MAX: {
			
 
				+      return ParseArgMax(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_ARG_MIN: {
			
 
				+      return ParseArgMin(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_AVERAGE_POOL_2D: {
			
 
				+      return ParsePool(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_CEIL: {
			
 
				+      return ParseCeil(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_CONCATENATION: {
			
 
				+      return ParseConcatenation(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_CONV_2D: {
			
 
				+      return ParseConv2D(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_DEPTHWISE_CONV_2D: {
			
 
				+      return ParseDepthwiseConv2D(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_DEQUANTIZE: {
			
 
				+      return ParseDequantize(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_FLOOR: {
			
 
				+      return ParseFloor(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_FULLY_CONNECTED: {
			
 
				+      return ParseFullyConnected(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_GREATER: {
			
 
				+      return ParseGreater(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_GREATER_EQUAL: {
			
 
				+      return ParseGreaterEqual(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_HARD_SWISH: {
			
 
				+      return ParseHardSwish(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_L2_NORMALIZATION: {
			
 
				+      return ParseL2Normalization(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_L2_POOL_2D: {
			
 
				+      return ParsePool(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_LESS: {
			
 
				+      return ParseLess(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_LESS_EQUAL: {
			
 
				+      return ParseLessEqual(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_LOG: {
			
 
				+      return ParseLog(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_LOGICAL_AND: {
			
 
				+      return ParseLogicalAnd(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_LOGICAL_NOT: {
			
 
				+      return ParseLogicalNot(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_LOGICAL_OR: {
			
 
				+      return ParseLogicalOr(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_LOGISTIC: {
			
 
				+      return ParseLogistic(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_MAXIMUM: {
			
 
				+      return ParseMaximum(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_MAX_POOL_2D: {
			
 
				+      return ParsePool(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_MEAN: {
			
 
				+      return ParseReducer(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_MINIMUM: {
			
 
				+      return ParseMinimum(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_MUL: {
			
 
				+      return ParseMul(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_NEG: {
			
 
				+      return ParseNeg(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_NOT_EQUAL: {
			
 
				+      return ParseNotEqual(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_PACK: {
			
 
				+      return ParsePack(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_PAD: {
			
 
				+      return ParsePad(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_PADV2: {
			
 
				+      return ParsePadV2(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_PRELU: {
			
 
				+      return ParsePrelu(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_QUANTIZE: {
			
 
				+      return ParseQuantize(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_REDUCE_ANY: {
			
 
				+      return ParseReducer(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_REDUCE_MAX: {
			
 
				+      return ParseReducer(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_REDUCE_MIN: {
			
 
				+      return ParseReducer(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_REDUCE_PROD: {
			
 
				+      return ParseReducer(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_RELU: {
			
 
				+      return ParseRelu(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_RELU6: {
			
 
				+      return ParseRelu6(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_RESHAPE: {
			
 
				+      return ParseReshape(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_RESIZE_NEAREST_NEIGHBOR: {
			
 
				+      return ParseResizeNearestNeighbor(op, error_reporter, allocator,
			
 
				+                                        builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_ROUND: {
			
 
				+      return ParseRound(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_RSQRT: {
			
 
				+      return ParseRsqrt(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_SIN: {
			
 
				+      return ParseSin(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_SOFTMAX: {
			
 
				+      return ParseSoftmax(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_SPLIT: {
			
 
				+      return ParseSplit(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_SQRT: {
			
 
				+      return ParseSqrt(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_SQUARE: {
			
 
				+      return ParseSquare(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_STRIDED_SLICE: {
			
 
				+      return ParseStridedSlice(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_SUB: {
			
 
				+      return ParseSub(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_SUM: {
			
 
				+      return ParseReducer(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_SVDF: {
			
 
				+      return ParseSvdf(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_TANH: {
			
 
				+      return ParseTanh(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_UNPACK: {
			
 
				+      return ParseUnpack(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_CAST: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteCastParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* schema_params = op->builtin_options_as_CastOptions()) {
			
 
				+        TF_LITE_ENSURE_STATUS(ConvertTensorType(schema_params->in_data_type(),
			
 
				+                                                &params->in_data_type,
			
 
				+                                                error_reporter));
			
 
				+        TF_LITE_ENSURE_STATUS(ConvertTensorType(schema_params->out_data_type(),
			
 
				+                                                &params->out_data_type,
			
 
				+                                                error_reporter));
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_LSH_PROJECTION: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteLSHProjectionParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* lshParams =
			
 
				+              op->builtin_options_as_LSHProjectionOptions()) {
			
 
				+        params->type = parseLSHProjectionType(lshParams->type());
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteSequenceRNNParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* sequence_rnn_params =
			
 
				+              op->builtin_options_as_SequenceRNNOptions()) {
			
 
				+        params->activation =
			
 
				+            ConvertActivation(sequence_rnn_params->fused_activation_function());
			
 
				+        params->time_major = sequence_rnn_params->time_major();
			
 
				+        params->asymmetric_quantize_inputs =
			
 
				+            sequence_rnn_params->asymmetric_quantize_inputs();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN: {
			
 
				+      auto params =
			
 
				+          safe_allocator.Allocate<TfLiteBidirectionalSequenceRNNParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* bidi_sequence_rnn_params =
			
 
				+              op->builtin_options_as_BidirectionalSequenceRNNOptions()) {
			
 
				+        params->activation = ConvertActivation(
			
 
				+            bidi_sequence_rnn_params->fused_activation_function());
			
 
				+        params->time_major = bidi_sequence_rnn_params->time_major();
			
 
				+        params->merge_outputs = bidi_sequence_rnn_params->merge_outputs();
			
 
				+        params->asymmetric_quantize_inputs =
			
 
				+            bidi_sequence_rnn_params->asymmetric_quantize_inputs();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_RNN: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteRNNParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* rnn_params = op->builtin_options_as_RNNOptions()) {
			
 
				+        params->activation =
			
 
				+            ConvertActivation(rnn_params->fused_activation_function());
			
 
				+        params->asymmetric_quantize_inputs =
			
 
				+            rnn_params->asymmetric_quantize_inputs();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_EMBEDDING_LOOKUP_SPARSE: {
			
 
				+      auto params =
			
 
				+          safe_allocator.Allocate<TfLiteEmbeddingLookupSparseParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* embedding_params =
			
 
				+              op->builtin_options_as_EmbeddingLookupSparseOptions()) {
			
 
				+        params->combiner = parseCombinerType(embedding_params->combiner());
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+
			
 
				+    case BuiltinOperator_HASHTABLE_LOOKUP:
			
 
				+      // no-op.
			
 
				+      return kTfLiteOk;
			
 
				+    case BuiltinOperator_DIV: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteDivParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* schema_params = op->builtin_options_as_DivOptions()) {
			
 
				+        params->activation =
			
 
				+            ConvertActivation(schema_params->fused_activation_function());
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteLocalResponseNormParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* schema_params =
			
 
				+              op->builtin_options_as_LocalResponseNormalizationOptions()) {
			
 
				+        params->radius = schema_params->radius();
			
 
				+        params->bias = schema_params->bias();
			
 
				+        params->alpha = schema_params->alpha();
			
 
				+        params->beta = schema_params->beta();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_LSTM: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteLSTMParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* lstm_params = op->builtin_options_as_LSTMOptions()) {
			
 
				+        params->activation =
			
 
				+            ConvertActivation(lstm_params->fused_activation_function());
			
 
				+        params->cell_clip = lstm_params->cell_clip();
			
 
				+        params->proj_clip = lstm_params->proj_clip();
			
 
				+        switch (lstm_params->kernel_type()) {
			
 
				+          case LSTMKernelType_FULL:
			
 
				+            params->kernel_type = kTfLiteLSTMFullKernel;
			
 
				+            break;
			
 
				+          case LSTMKernelType_BASIC:
			
 
				+            params->kernel_type = kTfLiteLSTMBasicKernel;
			
 
				+            break;
			
 
				+          default:
			
 
				+            TF_LITE_REPORT_ERROR(error_reporter,
			
 
				+                                 "Unhandled LSTM kernel type: %d",
			
 
				+                                 lstm_params->kernel_type());
			
 
				+            return kTfLiteError;
			
 
				+        }
			
 
				+        params->asymmetric_quantize_inputs =
			
 
				+            lstm_params->asymmetric_quantize_inputs();
			
 
				+      } else {
			
 
				+        TF_LITE_REPORT_ERROR(error_reporter,
			
 
				+                             "No valid LSTM builtin options exist");
			
 
				+        return kTfLiteError;
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: {
			
 
				+      auto params =
			
 
				+          safe_allocator.Allocate<TfLiteUnidirectionalSequenceLSTMParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* seq_lstm_params =
			
 
				+              op->builtin_options_as_UnidirectionalSequenceLSTMOptions()) {
			
 
				+        params->activation =
			
 
				+            ConvertActivation(seq_lstm_params->fused_activation_function());
			
 
				+        params->cell_clip = seq_lstm_params->cell_clip();
			
 
				+        params->proj_clip = seq_lstm_params->proj_clip();
			
 
				+        params->time_major = seq_lstm_params->time_major();
			
 
				+        params->asymmetric_quantize_inputs =
			
 
				+            seq_lstm_params->asymmetric_quantize_inputs();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: {
			
 
				+      auto params =
			
 
				+          safe_allocator.Allocate<TfLiteBidirectionalSequenceLSTMParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* bidi_lstm_params =
			
 
				+              op->builtin_options_as_BidirectionalSequenceLSTMOptions()) {
			
 
				+        params->activation =
			
 
				+            ConvertActivation(bidi_lstm_params->fused_activation_function());
			
 
				+        params->cell_clip = bidi_lstm_params->cell_clip();
			
 
				+        params->proj_clip = bidi_lstm_params->proj_clip();
			
 
				+        params->merge_outputs = bidi_lstm_params->merge_outputs();
			
 
				+        params->time_major = bidi_lstm_params->time_major();
			
 
				+        params->asymmetric_quantize_inputs =
			
 
				+            bidi_lstm_params->asymmetric_quantize_inputs();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_RESIZE_BILINEAR: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteResizeBilinearParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* schema_params =
			
 
				+              op->builtin_options_as_ResizeBilinearOptions()) {
			
 
				+        params->align_corners = schema_params->align_corners();
			
 
				+        params->half_pixel_centers = schema_params->half_pixel_centers();
			
 
				+      } else {
			
 
				+        // Some older models did not populate the ResizeBilinearOptions field in
			
 
				+        // the flatbuffer, so ensure it's set to a sensible default.
			
 
				+        params->align_corners = false;
			
 
				+        params->half_pixel_centers = false;
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_SKIP_GRAM: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteSkipGramParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* skip_gram_params =
			
 
				+              op->builtin_options_as_SkipGramOptions()) {
			
 
				+        params->ngram_size = skip_gram_params->ngram_size();
			
 
				+        params->max_skip_size = skip_gram_params->max_skip_size();
			
 
				+        params->include_all_ngrams = skip_gram_params->include_all_ngrams();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_SPACE_TO_DEPTH: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteSpaceToDepthParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* schema_params =
			
 
				+              op->builtin_options_as_SpaceToDepthOptions()) {
			
 
				+        params->block_size = schema_params->block_size();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_DEPTH_TO_SPACE: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteDepthToSpaceParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* schema_params =
			
 
				+              op->builtin_options_as_DepthToSpaceOptions()) {
			
 
				+        params->block_size = schema_params->block_size();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_GATHER: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteGatherParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      params->axis = 0;
			
 
				+      if (const auto* gather_params = op->builtin_options_as_GatherOptions()) {
			
 
				+        params->axis = gather_params->axis();
			
 
				+      }
			
 
				+
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_SPLIT_V: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteSplitParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* schema_params = op->builtin_options_as_SplitVOptions()) {
			
 
				+        params->num_splits = schema_params->num_splits();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_SQUEEZE: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteSqueezeParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* schema_params = op->builtin_options_as_SqueezeOptions()) {
			
 
				+        const auto* squeeze_dims = schema_params->squeeze_dims();
			
 
				+        TF_LITE_ENSURE_STATUS(FlatBufferIntVectorToArray(
			
 
				+            sizeof(params->squeeze_dims), squeeze_dims, params->squeeze_dims,
			
 
				+            error_reporter, "squeeze"));
			
 
				+        params->num_squeeze_dims = squeeze_dims->size();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_TRANSPOSE_CONV: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteTransposeConvParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* transpose_conv_params =
			
 
				+              op->builtin_options_as_TransposeConvOptions()) {
			
 
				+        params->padding = ConvertPadding(transpose_conv_params->padding());
			
 
				+        params->stride_width = transpose_conv_params->stride_w();
			
 
				+        params->stride_height = transpose_conv_params->stride_h();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_SPARSE_TO_DENSE: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteSparseToDenseParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* sparse_to_dense_params =
			
 
				+              op->builtin_options_as_SparseToDenseOptions()) {
			
 
				+        params->validate_indices = sparse_to_dense_params->validate_indices();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_SHAPE: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteShapeParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* schema_params = op->builtin_options_as_ShapeOptions()) {
			
 
				+        TF_LITE_ENSURE_STATUS(ConvertTensorType(
			
 
				+            schema_params->out_type(), &params->out_type, error_reporter));
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_DELEGATE: {
			
 
				+      // TODO(ycling): Revisit when supporting saving delegated models.
			
 
				+      TF_LITE_REPORT_ERROR(error_reporter,
			
 
				+                           "DELEGATE op shouldn't exist in model.");
			
 
				+      return kTfLiteError;
			
 
				+    }
			
 
				+    case BuiltinOperator_FAKE_QUANT: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteFakeQuantParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* schema_params =
			
 
				+              op->builtin_options_as_FakeQuantOptions()) {
			
 
				+        params->min = schema_params->min();
			
 
				+        params->max = schema_params->max();
			
 
				+        params->num_bits = schema_params->num_bits();
			
 
				+        params->narrow_range = schema_params->narrow_range();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_ONE_HOT: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteOneHotParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* schema_params = op->builtin_options_as_OneHotOptions()) {
			
 
				+        params->axis = schema_params->axis();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_LEAKY_RELU: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteLeakyReluParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* leaky_relu_params =
			
 
				+              op->builtin_options_as_LeakyReluOptions()) {
			
 
				+        params->alpha = leaky_relu_params->alpha();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_MIRROR_PAD: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteMirrorPaddingParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      const auto* mirror_pad_params = op->builtin_options_as_MirrorPadOptions();
			
 
				+      if (mirror_pad_params != nullptr) {
			
 
				+        params->mode =
			
 
				+            mirror_pad_params->mode() == tflite::MirrorPadMode_REFLECT
			
 
				+                ? TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingReflect
			
 
				+                : TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingSymmetric;
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_UNIQUE: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteUniqueParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      const auto* unique_params = op->builtin_options_as_UniqueOptions();
			
 
				+      if (unique_params != nullptr) {
			
 
				+        params->index_out_type =
			
 
				+            unique_params->idx_out_type() == tflite::TensorType_INT64
			
 
				+                ? TfLiteType::kTfLiteInt64
			
 
				+                : TfLiteType::kTfLiteInt32;
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_REVERSE_SEQUENCE: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteReverseSequenceParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* reverse_seq_params =
			
 
				+              op->builtin_options_as_ReverseSequenceOptions()) {
			
 
				+        params->seq_dim = reverse_seq_params->seq_dim();
			
 
				+        params->batch_dim = reverse_seq_params->batch_dim();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_IF: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteIfParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* if_params = op->builtin_options_as_IfOptions()) {
			
 
				+        params->then_subgraph_index = if_params->then_subgraph_index();
			
 
				+        params->else_subgraph_index = if_params->else_subgraph_index();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_WHILE: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteWhileParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* while_params = op->builtin_options_as_WhileOptions()) {
			
 
				+        params->cond_subgraph_index = while_params->cond_subgraph_index();
			
 
				+        params->body_subgraph_index = while_params->body_subgraph_index();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case BuiltinOperator_BATCH_MATMUL: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteBatchMatMulParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      if (const auto* bmm_params =
			
 
				+              op->builtin_options_as_BatchMatMulOptions()) {
			
 
				+        params->adj_x = bmm_params->adj_x();
			
 
				+        params->adj_y = bmm_params->adj_y();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    // Below are the ops with no builtin_data structure.
			
 
				+    case BuiltinOperator_BATCH_TO_SPACE_ND:
			
 
				+    // TODO(aselle): Implement call in BuiltinOptions, but nullptrs are
			
 
				+    // ok for now, since there is no call implementation either.
			
 
				+    case BuiltinOperator_CALL:
			
 
				+    case BuiltinOperator_CONCAT_EMBEDDINGS:
			
 
				+    case BuiltinOperator_COS:
			
 
				+    case BuiltinOperator_CUSTOM:
			
 
				+    case BuiltinOperator_ELU:
			
 
				+    case BuiltinOperator_EMBEDDING_LOOKUP:
			
 
				+    case BuiltinOperator_EQUAL:
			
 
				+    case BuiltinOperator_EXP:
			
 
				+    case BuiltinOperator_EXPAND_DIMS:
			
 
				+    case BuiltinOperator_LOG_SOFTMAX:
			
 
				+    case BuiltinOperator_MATRIX_DIAG:
			
 
				+    case BuiltinOperator_MATRIX_SET_DIAG:
			
 
				+    case BuiltinOperator_RELU_N1_TO_1:
			
 
				+    case BuiltinOperator_SELECT:
			
 
				+    case BuiltinOperator_SELECT_V2:
			
 
				+    case BuiltinOperator_SLICE:
			
 
				+    case BuiltinOperator_SPACE_TO_BATCH_ND:
			
 
				+    case BuiltinOperator_TILE:
			
 
				+    case BuiltinOperator_TOPK_V2:
			
 
				+    case BuiltinOperator_TRANSPOSE:
			
 
				+    case BuiltinOperator_POW:
			
 
				+    case BuiltinOperator_FLOOR_DIV:
			
 
				+    case BuiltinOperator_ZEROS_LIKE:
			
 
				+    case BuiltinOperator_FILL:
			
 
				+    case BuiltinOperator_FLOOR_MOD:
			
 
				+    case BuiltinOperator_RANGE:
			
 
				+    case BuiltinOperator_SQUARED_DIFFERENCE:
			
 
				+    case BuiltinOperator_REVERSE_V2:
			
 
				+    case BuiltinOperator_ADD_N:
			
 
				+    case BuiltinOperator_GATHER_ND:
			
 
				+    case BuiltinOperator_WHERE:
			
 
				+    case BuiltinOperator_RANK:
			
 
				+    case BuiltinOperator_NON_MAX_SUPPRESSION_V4:
			
 
				+    case BuiltinOperator_NON_MAX_SUPPRESSION_V5:
			
 
				+    case BuiltinOperator_SCATTER_ND:
			
 
				+    case BuiltinOperator_DENSIFY:
			
 
				+    case BuiltinOperator_SEGMENT_SUM:
			
 
				+      return kTfLiteOk;
			
 
				+  }
			
 
				+  return kTfLiteError;
			
 
				+}  // NOLINT[readability/fn_size]
			
 
				+#endif  // !defined(TF_LITE_STATIC_MEMORY)
			
 
				+}  // namespace
			
 
				+
			
 
				+TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
			
 
				+                               ErrorReporter* error_reporter) {
			
 
				+  switch (tensor_type) {
			
 
				+    case TensorType_FLOAT16:
			
 
				+      *type = kTfLiteFloat16;
			
 
				+      return kTfLiteOk;
			
 
				+    case TensorType_FLOAT32:
			
 
				+      *type = kTfLiteFloat32;
			
 
				+      return kTfLiteOk;
			
 
				+    case TensorType_FLOAT64:
			
 
				+      *type = kTfLiteFloat64;
			
 
				+      return kTfLiteOk;
			
 
				+    case TensorType_INT16:
			
 
				+      *type = kTfLiteInt16;
			
 
				+      return kTfLiteOk;
			
 
				+    case TensorType_INT32:
			
 
				+      *type = kTfLiteInt32;
			
 
				+      return kTfLiteOk;
			
 
				+    case TensorType_UINT8:
			
 
				+      *type = kTfLiteUInt8;
			
 
				+      return kTfLiteOk;
			
 
				+    case TensorType_INT8:
			
 
				+      *type = kTfLiteInt8;
			
 
				+      return kTfLiteOk;
			
 
				+    case TensorType_INT64:
			
 
				+      *type = kTfLiteInt64;
			
 
				+      return kTfLiteOk;
			
 
				+    case TensorType_STRING:
			
 
				+      *type = kTfLiteString;
			
 
				+      return kTfLiteOk;
			
 
				+    case TensorType_BOOL:
			
 
				+      *type = kTfLiteBool;
			
 
				+      return kTfLiteOk;
			
 
				+    case TensorType_COMPLEX64:
			
 
				+      *type = kTfLiteComplex64;
			
 
				+      return kTfLiteOk;
			
 
				+    case TensorType_COMPLEX128:
			
 
				+      *type = kTfLiteComplex128;
			
 
				+      return kTfLiteOk;
			
 
				+    default:
			
 
				+      *type = kTfLiteNoType;
			
 
				+      TF_LITE_REPORT_ERROR(error_reporter,
			
 
				+                           "Unsupported data type %d in tensor\n", tensor_type);
			
 
				+      return kTfLiteError;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseAbs(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                      void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                      BuiltinDataAllocator* allocator, void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteAddParams, SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteAddParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const AddOptions* schema_params = op->builtin_options_as_AddOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->activation =
			
 
				+        ConvertActivation(schema_params->fused_activation_function());
			
 
				+    params->pot_scale_int16 = schema_params->pot_scale_int16();
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseArgMax(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                         BuiltinDataAllocator* allocator, void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteArgMaxParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteArgMaxParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const ArgMaxOptions* schema_params = op->builtin_options_as_ArgMaxOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    TF_LITE_ENSURE_STATUS(ConvertTensorType(
			
 
				+        schema_params->output_type(), &params->output_type, error_reporter));
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseArgMin(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                         BuiltinDataAllocator* allocator, void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteArgMinParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteArgMinParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const ArgMinOptions* schema_params = op->builtin_options_as_ArgMinOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    TF_LITE_ENSURE_STATUS(ConvertTensorType(
			
 
				+        schema_params->output_type(), &params->output_type, error_reporter));
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseCeil(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                       void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseConcatenation(const Operator* op,
			
 
				+                                ErrorReporter* error_reporter,
			
 
				+                                BuiltinDataAllocator* allocator,
			
 
				+                                void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteConcatenationParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteConcatenationParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const ConcatenationOptions* schema_params =
			
 
				+      op->builtin_options_as_ConcatenationOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->activation =
			
 
				+        ConvertActivation(schema_params->fused_activation_function());
			
 
				+    params->axis = schema_params->axis();
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseConv2D(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                         BuiltinDataAllocator* allocator, void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteConvParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteConvParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const Conv2DOptions* schema_params = op->builtin_options_as_Conv2DOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->padding = ConvertPadding(schema_params->padding());
			
 
				+    params->stride_width = schema_params->stride_w();
			
 
				+    params->stride_height = schema_params->stride_h();
			
 
				+    params->activation =
			
 
				+        ConvertActivation(schema_params->fused_activation_function());
			
 
				+
			
 
				+    params->dilation_width_factor = schema_params->dilation_w_factor();
			
 
				+    params->dilation_height_factor = schema_params->dilation_h_factor();
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseCos(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                      void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseDepthwiseConv2D(const Operator* op,
			
 
				+                                  ErrorReporter* error_reporter,
			
 
				+                                  BuiltinDataAllocator* allocator,
			
 
				+                                  void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+
			
 
				+  std::unique_ptr<TfLiteDepthwiseConvParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteDepthwiseConvParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const DepthwiseConv2DOptions* schema_params =
			
 
				+      op->builtin_options_as_DepthwiseConv2DOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->padding = ConvertPadding(schema_params->padding());
			
 
				+    params->stride_width = schema_params->stride_w();
			
 
				+    params->stride_height = schema_params->stride_h();
			
 
				+    params->depth_multiplier = schema_params->depth_multiplier();
			
 
				+    params->activation =
			
 
				+        ConvertActivation(schema_params->fused_activation_function());
			
 
				+
			
 
				+    params->dilation_width_factor = schema_params->dilation_w_factor();
			
 
				+    params->dilation_height_factor = schema_params->dilation_h_factor();
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseDequantize(const Operator*, ErrorReporter*,
			
 
				+                             BuiltinDataAllocator*, void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseEqual(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                        void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseFloor(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                        void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseFullyConnected(const Operator* op,
			
 
				+                                 ErrorReporter* error_reporter,
			
 
				+                                 BuiltinDataAllocator* allocator,
			
 
				+                                 void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+
			
 
				+  std::unique_ptr<TfLiteFullyConnectedParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteFullyConnectedParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const FullyConnectedOptions* schema_params =
			
 
				+      op->builtin_options_as_FullyConnectedOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->activation =
			
 
				+        ConvertActivation(schema_params->fused_activation_function());
			
 
				+    params->keep_num_dims = schema_params->keep_num_dims();
			
 
				+    params->asymmetric_quantize_inputs =
			
 
				+        schema_params->asymmetric_quantize_inputs();
			
 
				+
			
 
				+    switch (schema_params->weights_format()) {
			
 
				+      case FullyConnectedOptionsWeightsFormat_DEFAULT:
			
 
				+        params->weights_format = kTfLiteFullyConnectedWeightsFormatDefault;
			
 
				+        break;
			
 
				+      case FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8:
			
 
				+        params->weights_format =
			
 
				+            kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8;
			
 
				+        break;
			
 
				+      default:
			
 
				+        TF_LITE_REPORT_ERROR(error_reporter,
			
 
				+                             "Unhandled fully-connected weights format.");
			
 
				+        return kTfLiteError;
			
 
				+    }
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseGreater(const Operator*, ErrorReporter*,
			
 
				+                          BuiltinDataAllocator*, void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseGreaterEqual(const Operator*, ErrorReporter*,
			
 
				+                               BuiltinDataAllocator*, void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseHardSwish(const Operator*, ErrorReporter*,
			
 
				+                            BuiltinDataAllocator*, void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseL2Normalization(const Operator* op,
			
 
				+                                  ErrorReporter* error_reporter,
			
 
				+                                  BuiltinDataAllocator* allocator,
			
 
				+                                  void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteL2NormParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteL2NormParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const L2NormOptions* schema_params = op->builtin_options_as_L2NormOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->activation =
			
 
				+        ConvertActivation(schema_params->fused_activation_function());
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseLess(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                       void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseLessEqual(const Operator*, ErrorReporter*,
			
 
				+                            BuiltinDataAllocator*, void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseLog(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                      void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseLogicalAnd(const Operator*, ErrorReporter*,
			
 
				+                             BuiltinDataAllocator*, void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseLogicalNot(const Operator*, ErrorReporter*,
			
 
				+                             BuiltinDataAllocator*, void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseLogicalOr(const Operator*, ErrorReporter*,
			
 
				+                            BuiltinDataAllocator*, void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseLogistic(const Operator*, ErrorReporter*,
			
 
				+                           BuiltinDataAllocator*, void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseMaximum(const Operator*, ErrorReporter*,
			
 
				+                          BuiltinDataAllocator*, void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseMinimum(const Operator*, ErrorReporter*,
			
 
				+                          BuiltinDataAllocator*, void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                      BuiltinDataAllocator* allocator, void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteMulParams, SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteMulParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const MulOptions* schema_params = op->builtin_options_as_MulOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->activation =
			
 
				+        ConvertActivation(schema_params->fused_activation_function());
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseNeg(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                      void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseNotEqual(const Operator*, ErrorReporter*,
			
 
				+                           BuiltinDataAllocator*, void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParsePack(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                       BuiltinDataAllocator* allocator, void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLitePackParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLitePackParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const PackOptions* schema_params = op->builtin_options_as_PackOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->values_count = schema_params->values_count();
			
 
				+    params->axis = schema_params->axis();
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParsePad(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                      void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParsePadV2(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                        void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParsePool(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                       BuiltinDataAllocator* allocator, void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLitePoolParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLitePoolParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const Pool2DOptions* schema_params = op->builtin_options_as_Pool2DOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->padding = ConvertPadding(schema_params->padding());
			
 
				+    params->stride_width = schema_params->stride_w();
			
 
				+    params->stride_height = schema_params->stride_h();
			
 
				+    params->filter_width = schema_params->filter_width();
			
 
				+    params->filter_height = schema_params->filter_height();
			
 
				+    params->activation =
			
 
				+        ConvertActivation(schema_params->fused_activation_function());
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParsePrelu(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                        void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseQuantize(const Operator*, ErrorReporter*,
			
 
				+                           BuiltinDataAllocator*, void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseReducer(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                          BuiltinDataAllocator* allocator,
			
 
				+                          void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+
			
 
				+  std::unique_ptr<TfLiteReducerParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteReducerParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const ReducerOptions* schema_params = op->builtin_options_as_ReducerOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->keep_dims = schema_params->keep_dims();
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseRelu(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                       void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseRelu6(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                        void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseReshape(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                          BuiltinDataAllocator* allocator,
			
 
				+                          void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+
			
 
				+  std::unique_ptr<TfLiteReshapeParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteReshapeParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const ReshapeOptions* schema_params = op->builtin_options_as_ReshapeOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    const flatbuffers::Vector<int32_t>* new_shape = schema_params->new_shape();
			
 
				+    // TODO(b/147203660): We need to figure out when dynamic reshape
			
 
				+    // (new_shape is a tensor) happens, why the option is not a nullptr.
			
 
				+    // But nonethless, we should only copy when new_shape is not a nullptr.
			
 
				+    if (new_shape != nullptr) {
			
 
				+      TF_LITE_ENSURE_STATUS(
			
 
				+          FlatBufferIntVectorToArray(sizeof(params->shape), new_shape,
			
 
				+                                     params->shape, error_reporter, "reshape"));
			
 
				+      params->num_dimensions = new_shape->size();
			
 
				+    } else {
			
 
				+      // TODO(b/157480169) TODO(b/147203660): We should either return
			
 
				+      // kTfLiteError or fill in some reasonable defaults in the params struct.
			
 
				+      // We are not doing so until we better undertand the ramifications of
			
 
				+      // changing the legacy behavior.
			
 
				+    }
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseResizeNearestNeighbor(const Operator* op,
			
 
				+                                        ErrorReporter* error_reporter,
			
 
				+                                        BuiltinDataAllocator* allocator,
			
 
				+                                        void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteResizeNearestNeighborParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteResizeNearestNeighborParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const ResizeNearestNeighborOptions* schema_params =
			
 
				+      op->builtin_options_as_ResizeNearestNeighborOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->align_corners = schema_params->align_corners();
			
 
				+    params->half_pixel_centers = schema_params->half_pixel_centers();
			
 
				+  } else {
			
 
				+    params->align_corners = false;
			
 
				+    params->half_pixel_centers = false;
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseRound(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                        void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseRsqrt(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                        void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseSin(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                      void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseSoftmax(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                          BuiltinDataAllocator* allocator,
			
 
				+                          void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteSoftmaxParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteSoftmaxParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const SoftmaxOptions* schema_params = op->builtin_options_as_SoftmaxOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->beta = schema_params->beta();
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseSplit(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                        BuiltinDataAllocator* allocator, void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteSplitParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteSplitParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const SplitOptions* schema_params = op->builtin_options_as_SplitOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->num_splits = schema_params->num_splits();
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseSqrt(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                       void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseSquare(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                         void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseStridedSlice(const Operator* op,
			
 
				+                               ErrorReporter* error_reporter,
			
 
				+                               BuiltinDataAllocator* allocator,
			
 
				+                               void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteStridedSliceParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteStridedSliceParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const StridedSliceOptions* schema_params =
			
 
				+      op->builtin_options_as_StridedSliceOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->begin_mask = schema_params->begin_mask();
			
 
				+    params->end_mask = schema_params->end_mask();
			
 
				+    params->ellipsis_mask = schema_params->ellipsis_mask();
			
 
				+    params->new_axis_mask = schema_params->new_axis_mask();
			
 
				+    params->shrink_axis_mask = schema_params->shrink_axis_mask();
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                      BuiltinDataAllocator* allocator, void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteSubParams, SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteSubParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const SubOptions* schema_params = op->builtin_options_as_SubOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->activation =
			
 
				+        ConvertActivation(schema_params->fused_activation_function());
			
 
				+    params->pot_scale_int16 = schema_params->pot_scale_int16();
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseSvdf(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                       BuiltinDataAllocator* allocator, void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteSVDFParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteSVDFParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const SVDFOptions* schema_params = op->builtin_options_as_SVDFOptions();
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->rank = schema_params->rank();
			
 
				+    params->activation =
			
 
				+        ConvertActivation(schema_params->fused_activation_function());
			
 
				+    params->asymmetric_quantize_inputs =
			
 
				+        schema_params->asymmetric_quantize_inputs();
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+// We have this parse function instead of directly returning kTfLiteOk from the
			
 
				+// switch-case in ParseOpData because this function is used as part of the
			
 
				+// selective registration for the OpResolver implementation in micro.
			
 
				+TfLiteStatus ParseTanh(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
			
 
				+                       void**) {
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseUnpack(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                         BuiltinDataAllocator* allocator, void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteUnpackParams,
			
 
				+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteUnpackParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const UnpackOptions* schema_params = op->builtin_options_as_UnpackOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->num = schema_params->num();
			
 
				+    params->axis = schema_params->axis();
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
			
 
				+                         ErrorReporter* error_reporter,
			
 
				+                         BuiltinDataAllocator* allocator, void** builtin_data) {
			
 
				+// TODO(b/145762662): It would be preferable to have the build graph for TF Lite
			
 
				+// Micro not have the ParseOpData function at all. This would require splitting
			
 
				+// the current file into two separate files, one of which defines the
			
 
				+// ParseOpData function and the other that defines the operator specific parse
			
 
				+// functions (e.g. ParseAdd).
			
 
				+//
			
 
				+// Such a split was attempted but was not worth the effort at the time because
			
 
				+// of the following reasons:
			
 
				+//  * We could either duplicate the functions and the SafeBuiltinDataAllocator
			
 
				+//    class in the anonymous namespace of this file, or attempt to make a common
			
 
				+//    library with these helper functions and class.
			
 
				+//  * Making a common library with a separate build target was not feasible as
			
 
				+//    it introduced circular dependencies due to the ErrorReporter and a common
			
 
				+//    .cc and .h within the same api build target the also cause circular
			
 
				+//    dependencies due to the  BuiltinDataAllocator class.
			
 
				+//  * If all the builtin operators were to have their own parse functions, or we
			
 
				+//    were ok with some amount of code duplication, then this split of the .cc
			
 
				+//    files would be a lot more feasible.
			
 
				+#ifdef TF_LITE_STATIC_MEMORY
			
 
				+  TF_LITE_REPORT_ERROR(
			
 
				+      error_reporter,
			
 
				+      "ParseOpData is unsupported on TfLiteMicro, please use the operator "
			
 
				+      "specific parse functions (e.g. ParseAdd etc.).\n");
			
 
				+  return kTfLiteError;
			
 
				+#else
			
 
				+  return ParseOpDataTfLite(op, op_type, error_reporter, allocator,
			
 
				+                           builtin_data);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/tensorflow/lite/micro/tensorflow/lite/core/api/flatbuffer_conversions.h
+++ b/tensorflow/lite/micro/tensorflow/lite/core/api/flatbuffer_conversions.h
@@ -0,0 +1,253 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
			
 
				+#define TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
			
 
				+
			
 
				+// These functions transform codes and data structures that are defined in the
			
 
				+// flatbuffer serialization format into in-memory values that are used by the
			
 
				+// runtime API and interpreter.
			
 
				+
			
 
				+#include <cstddef>
			
 
				+#include <new>
			
 
				+#include <type_traits>
			
 
				+
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/core/api/error_reporter.h"
			
 
				+#include "tensorflow/lite/schema/schema_generated.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+// Interface class for builtin data allocations.
			
 
				+class BuiltinDataAllocator {
			
 
				+ public:
			
 
				+  virtual void* Allocate(size_t size, size_t alignment_hint) = 0;
			
 
				+  virtual void Deallocate(void* data) = 0;
			
 
				+
			
 
				+  // Allocate a structure, but make sure it is a POD structure that doesn't
			
 
				+  // require constructors to run. The reason we do this, is that Interpreter's C
			
 
				+  // extension part will take ownership so destructors  will not be run during
			
 
				+  // deallocation.
			
 
				+  template <typename T>
			
 
				+  T* AllocatePOD() {
			
 
				+    // TODO(b/154346074): Change this to is_trivially_destructible when all
			
 
				+    // platform targets support that properly.
			
 
				+    static_assert(std::is_pod<T>::value, "Builtin data structure must be POD.");
			
 
				+    void* allocated_memory = this->Allocate(sizeof(T), alignof(T));
			
 
				+    return new (allocated_memory) T;
			
 
				+  }
			
 
				+
			
 
				+  virtual ~BuiltinDataAllocator() {}
			
 
				+};
			
 
				+
			
 
				+// Parse the appropriate data out of the op.
			
 
				+//
			
 
				+// This handles builtin data explicitly as there are flatbuffer schemas.
			
 
				+// If it returns kTfLiteOk, it passes the data out with `builtin_data`. The
			
 
				+// calling function has to pass in an allocator object, and this allocator
			
 
				+// will be called to reserve space for the output data. If the calling
			
 
				+// function's allocator reserves memory on the heap, then it's the calling
			
 
				+// function's responsibility to free it.
			
 
				+// If it returns kTfLiteError, `builtin_data` will be `nullptr`.
			
 
				+TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
			
 
				+                         ErrorReporter* error_reporter,
			
 
				+                         BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+// Converts the tensor data type used in the flat buffer to the representation
			
 
				+// used by the runtime.
			
 
				+TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
			
 
				+                               ErrorReporter* error_reporter);
			
 
				+
			
 
				+TfLiteStatus ParseAbs(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                      BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                      BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseArgMax(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                         BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseArgMin(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                         BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseCeil(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                       BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseConcatenation(const Operator* op,
			
 
				+                                ErrorReporter* error_reporter,
			
 
				+                                BuiltinDataAllocator* allocator,
			
 
				+                                void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseConv2D(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                         BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseCos(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                      BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseDepthwiseConv2D(const Operator* op,
			
 
				+                                  ErrorReporter* error_reporter,
			
 
				+                                  BuiltinDataAllocator* allocator,
			
 
				+                                  void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseDequantize(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                             BuiltinDataAllocator* allocator,
			
 
				+                             void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseEqual(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                        BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseFloor(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                        BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseFullyConnected(const Operator* op,
			
 
				+                                 ErrorReporter* error_reporter,
			
 
				+                                 BuiltinDataAllocator* allocator,
			
 
				+                                 void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseGreater(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                          BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseGreaterEqual(const Operator* op,
			
 
				+                               ErrorReporter* error_reporter,
			
 
				+                               BuiltinDataAllocator* allocator,
			
 
				+                               void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseHardSwish(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                            BuiltinDataAllocator* allocator,
			
 
				+                            void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseL2Normalization(const Operator* op,
			
 
				+                                  ErrorReporter* error_reporter,
			
 
				+                                  BuiltinDataAllocator* allocator,
			
 
				+                                  void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseLess(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                       BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseLessEqual(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                            BuiltinDataAllocator* allocator,
			
 
				+                            void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseLog(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                      BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseLogicalAnd(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                             BuiltinDataAllocator* allocator,
			
 
				+                             void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseLogicalNot(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                             BuiltinDataAllocator* allocator,
			
 
				+                             void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseLogicalOr(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                            BuiltinDataAllocator* allocator,
			
 
				+                            void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseLogistic(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                           BuiltinDataAllocator* allocator,
			
 
				+                           void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseMaximum(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                          BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseMinimum(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                          BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                      BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseNeg(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                      BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseNotEqual(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                           BuiltinDataAllocator* allocator,
			
 
				+                           void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParsePack(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                       BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParsePad(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                      BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParsePadV2(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                        BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParsePool(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                       BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParsePrelu(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                        BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseQuantize(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                           BuiltinDataAllocator* allocator,
			
 
				+                           void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseReducer(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                          BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseRelu(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                       BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseRelu6(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                        BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseReshape(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                          BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseResizeNearestNeighbor(const Operator* op,
			
 
				+                                        ErrorReporter* error_reporter,
			
 
				+                                        BuiltinDataAllocator* allocator,
			
 
				+                                        void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseRound(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                        BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseRsqrt(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                        BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseSin(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                      BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseSoftmax(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                          BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseSplit(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                        BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseSqrt(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                       BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseSquare(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                         BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseStridedSlice(const Operator* op,
			
 
				+                               ErrorReporter* error_reporter,
			
 
				+                               BuiltinDataAllocator* allocator,
			
 
				+                               void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                      BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseSvdf(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                       BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseTanh(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                       BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+TfLiteStatus ParseUnpack(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                         BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/core/api/op_resolver.cc
+++ b/tensorflow/lite/micro/tensorflow/lite/core/api/op_resolver.cc
@@ -0,0 +1,66 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include "tensorflow/lite/core/api/op_resolver.h"
			
 
				+
			
 
				+#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/core/api/error_reporter.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+TfLiteStatus GetRegistrationFromOpCode(
			
 
				+    const OperatorCode* opcode, const OpResolver& op_resolver,
			
 
				+    ErrorReporter* error_reporter, const TfLiteRegistration** registration) {
			
 
				+  TfLiteStatus status = kTfLiteOk;
			
 
				+  *registration = nullptr;
			
 
				+  auto builtin_code = opcode->builtin_code();
			
 
				+  int version = opcode->version();
			
 
				+
			
 
				+  if (builtin_code > BuiltinOperator_MAX ||
			
 
				+      builtin_code < BuiltinOperator_MIN) {
			
 
				+    TF_LITE_REPORT_ERROR(
			
 
				+        error_reporter,
			
 
				+        "Op builtin_code out of range: %d. Are you using old TFLite binary "
			
 
				+        "with newer model?",
			
 
				+        builtin_code);
			
 
				+    status = kTfLiteError;
			
 
				+  } else if (builtin_code != BuiltinOperator_CUSTOM) {
			
 
				+    *registration = op_resolver.FindOp(builtin_code, version);
			
 
				+    if (*registration == nullptr) {
			
 
				+      TF_LITE_REPORT_ERROR(
			
 
				+          error_reporter,
			
 
				+          "Didn't find op for builtin opcode '%s' version '%d'\n",
			
 
				+          EnumNameBuiltinOperator(builtin_code), version);
			
 
				+      status = kTfLiteError;
			
 
				+    }
			
 
				+  } else if (!opcode->custom_code()) {
			
 
				+    TF_LITE_REPORT_ERROR(
			
 
				+        error_reporter,
			
 
				+        "Operator with CUSTOM builtin_code has no custom_code.\n");
			
 
				+    status = kTfLiteError;
			
 
				+  } else {
			
 
				+    const char* name = opcode->custom_code()->c_str();
			
 
				+    *registration = op_resolver.FindOp(name, version);
			
 
				+    if (*registration == nullptr) {
			
 
				+      // Do not report error for unresolved custom op, we do the final check
			
 
				+      // while preparing ops.
			
 
				+      status = kTfLiteError;
			
 
				+    }
			
 
				+  }
			
 
				+  return status;
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/tensorflow/lite/micro/tensorflow/lite/core/api/op_resolver.h
+++ b/tensorflow/lite/micro/tensorflow/lite/core/api/op_resolver.h
@@ -0,0 +1,48 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
			
 
				+#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
			
 
				+
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/core/api/error_reporter.h"
			
 
				+#include "tensorflow/lite/schema/schema_generated.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+/// Abstract interface that returns TfLiteRegistrations given op codes or custom
			
 
				+/// op names. This is the mechanism that ops being referenced in the flatbuffer
			
 
				+/// model are mapped to executable function pointers (TfLiteRegistrations).
			
 
				+class OpResolver {
			
 
				+ public:
			
 
				+  /// Finds the op registration for a builtin operator by enum code.
			
 
				+  virtual const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
			
 
				+                                           int version) const = 0;
			
 
				+  /// Finds the op registration of a custom operator by op name.
			
 
				+  virtual const TfLiteRegistration* FindOp(const char* op,
			
 
				+                                           int version) const = 0;
			
 
				+  virtual ~OpResolver() {}
			
 
				+};
			
 
				+
			
 
				+// Handles the logic for converting between an OperatorCode structure extracted
			
 
				+// from a flatbuffer and information about a registered operator
			
 
				+// implementation.
			
 
				+TfLiteStatus GetRegistrationFromOpCode(const OperatorCode* opcode,
			
 
				+                                       const OpResolver& op_resolver,
			
 
				+                                       ErrorReporter* error_reporter,
			
 
				+                                       const TfLiteRegistration** registration);
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/core/api/profiler.h
+++ b/tensorflow/lite/micro/tensorflow/lite/core/api/profiler.h
@@ -0,0 +1,194 @@
 
				+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_CORE_API_PROFILER_H_
			
 
				+#define TENSORFLOW_LITE_CORE_API_PROFILER_H_
			
 
				+
			
 
				+#include <cstdint>
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+// A simple utility for enabling profiled event tracing in TensorFlow Lite.
			
 
				+class Profiler {
			
 
				+ public:
			
 
				+  // As certain Profiler instance might be only interested in certain event
			
 
				+  // types, we define each event type value to allow a Profiler to use
			
 
				+  // bitmasking bitwise operations to determine whether an event should be
			
 
				+  // recorded or not.
			
 
				+  enum class EventType {
			
 
				+    // Default event type, the metadata field has no special significance.
			
 
				+    DEFAULT = 1,
			
 
				+
			
 
				+    // The event is an operator invocation and the event_metadata field is the
			
 
				+    // index of operator node.
			
 
				+    OPERATOR_INVOKE_EVENT = 2,
			
 
				+
			
 
				+    // The event is an invocation for an internal operator of a TFLite delegate.
			
 
				+    // The event_metadata field is the index of operator node that's specific to
			
 
				+    // the delegate.
			
 
				+    DELEGATE_OPERATOR_INVOKE_EVENT = 4,
			
 
				+
			
 
				+    // The event is a recording of runtime instrumentation such as the overall
			
 
				+    // TFLite runtime status, the TFLite delegate status (if a delegate
			
 
				+    // is applied), and the overall model inference latency etc.
			
 
				+    // Note, the delegate status and overall status are stored as separate
			
 
				+    // event_metadata fields. In particular, the delegate status is encoded
			
 
				+    // as DelegateStatus::full_status().
			
 
				+    GENERAL_RUNTIME_INSTRUMENTATION_EVENT = 8,
			
 
				+  };
			
 
				+
			
 
				+  virtual ~Profiler() {}
			
 
				+
			
 
				+  // Signals the beginning of an event and returns a handle to the profile
			
 
				+  // event. The `event_metadata1` and `event_metadata2` have different
			
 
				+  // interpretations based on the actual Profiler instance and the `event_type`.
			
 
				+  // For example, as for the 'SubgraphAwareProfiler' defined in
			
 
				+  // lite/core/subgraph.h, when the event_type is OPERATOR_INVOKE_EVENT,
			
 
				+  // `event_metadata1` represents the index of a TFLite node, and
			
 
				+  // `event_metadata2` represents the index of the subgraph that this event
			
 
				+  // comes from.
			
 
				+  virtual uint32_t BeginEvent(const char* tag, EventType event_type,
			
 
				+                              int64_t event_metadata1,
			
 
				+                              int64_t event_metadata2) = 0;
			
 
				+  // Similar w/ the above, but `event_metadata2` defaults to 0.
			
 
				+  uint32_t BeginEvent(const char* tag, EventType event_type,
			
 
				+                      int64_t event_metadata) {
			
 
				+    return BeginEvent(tag, event_type, event_metadata, /*event_metadata2*/ 0);
			
 
				+  }
			
 
				+
			
 
				+  // Signals an end to the specified profile event with 'event_metadata's, This
			
 
				+  // is useful when 'event_metadata's are not available when the event begins
			
 
				+  // or when one wants to overwrite the 'event_metadata's set at the beginning.
			
 
				+  virtual void EndEvent(uint32_t event_handle, int64_t event_metadata1,
			
 
				+                        int64_t event_metadata2) {}
			
 
				+  // Signals an end to the specified profile event.
			
 
				+  virtual void EndEvent(uint32_t event_handle) = 0;
			
 
				+
			
 
				+  // Appends an event of type 'event_type' with 'tag' and 'event_metadata'
			
 
				+  // which started at 'start' and ended at 'end'
			
 
				+  // Note:
			
 
				+  // In cases were ProfileSimmarizer and tensorflow::StatsCalculator are used
			
 
				+  // they assume the value is in "usec", if in any case subclasses
			
 
				+  // didn't put usec, then the values are not meaningful.
			
 
				+  // TODO karimnosseir: Revisit and make the function more clear.
			
 
				+  void AddEvent(const char* tag, EventType event_type, uint64_t start,
			
 
				+                uint64_t end, int64_t event_metadata) {
			
 
				+    AddEvent(tag, event_type, start, end, event_metadata,
			
 
				+             /*event_metadata2*/ 0);
			
 
				+  }
			
 
				+
			
 
				+  virtual void AddEvent(const char* tag, EventType event_type, uint64_t start,
			
 
				+                        uint64_t end, int64_t event_metadata1,
			
 
				+                        int64_t event_metadata2) {}
			
 
				+
			
 
				+ protected:
			
 
				+  friend class ScopedProfile;
			
 
				+};
			
 
				+
			
 
				+// Adds a profile event to `profiler` that begins with the construction
			
 
				+// of the object and ends when the object goes out of scope.
			
 
				+// The lifetime of tag should be at least the lifetime of `profiler`.
			
 
				+// `profiler` may be null, in which case nothing is profiled.
			
 
				+class ScopedProfile {
			
 
				+ public:
			
 
				+  ScopedProfile(Profiler* profiler, const char* tag,
			
 
				+                Profiler::EventType event_type = Profiler::EventType::DEFAULT,
			
 
				+                int64_t event_metadata = 0)
			
 
				+      : profiler_(profiler), event_handle_(0) {
			
 
				+    if (profiler) {
			
 
				+      event_handle_ = profiler_->BeginEvent(tag, event_type, event_metadata);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  ~ScopedProfile() {
			
 
				+    if (profiler_) {
			
 
				+      profiler_->EndEvent(event_handle_);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+ protected:
			
 
				+  Profiler* profiler_;
			
 
				+  uint32_t event_handle_;
			
 
				+};
			
 
				+
			
 
				+class ScopedOperatorProfile : public ScopedProfile {
			
 
				+ public:
			
 
				+  ScopedOperatorProfile(Profiler* profiler, const char* tag, int node_index)
			
 
				+      : ScopedProfile(profiler, tag, Profiler::EventType::OPERATOR_INVOKE_EVENT,
			
 
				+                      static_cast<uint32_t>(node_index)) {}
			
 
				+};
			
 
				+
			
 
				+class ScopedDelegateOperatorProfile : public ScopedProfile {
			
 
				+ public:
			
 
				+  ScopedDelegateOperatorProfile(Profiler* profiler, const char* tag,
			
 
				+                                int node_index)
			
 
				+      : ScopedProfile(profiler, tag,
			
 
				+                      Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT,
			
 
				+                      static_cast<uint32_t>(node_index)) {}
			
 
				+};
			
 
				+
			
 
				+class ScopedRuntimeInstrumentationProfile : public ScopedProfile {
			
 
				+ public:
			
 
				+  ScopedRuntimeInstrumentationProfile(Profiler* profiler, const char* tag)
			
 
				+      : ScopedProfile(
			
 
				+            profiler, tag,
			
 
				+            Profiler::EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT, -1) {}
			
 
				+
			
 
				+  void set_runtime_status(int64_t delegate_status, int64_t interpreter_status) {
			
 
				+    if (profiler_) {
			
 
				+      delegate_status_ = delegate_status;
			
 
				+      interpreter_status_ = interpreter_status;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  ~ScopedRuntimeInstrumentationProfile() {
			
 
				+    if (profiler_) {
			
 
				+      profiler_->EndEvent(event_handle_, delegate_status_, interpreter_status_);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+ private:
			
 
				+  int64_t delegate_status_;
			
 
				+  int64_t interpreter_status_;
			
 
				+};
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#define TFLITE_VARNAME_UNIQ_IMPL(name, ctr) name##ctr
			
 
				+#define TFLITE_VARNAME_UNIQ(name, ctr) TFLITE_VARNAME_UNIQ_IMPL(name, ctr)
			
 
				+
			
 
				+#define TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler, tag)          \
			
 
				+  tflite::ScopedProfile TFLITE_VARNAME_UNIQ(_profile_, __COUNTER__)( \
			
 
				+      (profiler), (tag))
			
 
				+
			
 
				+#define TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE(profiler, tag, node_index)     \
			
 
				+  tflite::ScopedOperatorProfile TFLITE_VARNAME_UNIQ(_profile_, __COUNTER__)( \
			
 
				+      (profiler), (tag), (node_index))
			
 
				+
			
 
				+#define TFLITE_SCOPED_DELEGATE_OPERATOR_PROFILE(profiler, tag, node_index) \
			
 
				+  tflite::ScopedDelegateOperatorProfile TFLITE_VARNAME_UNIQ(               \
			
 
				+      _profile_, __COUNTER__)((profiler), (tag), (node_index))
			
 
				+
			
 
				+#define TFLITE_ADD_RUNTIME_INSTRUMENTATION_EVENT(                          \
			
 
				+    profiler, tag, delegate_status, interpreter_status)                    \
			
 
				+  do {                                                                     \
			
 
				+    if (!profiler) {                                                       \
			
 
				+      const auto handle = profiler->BeginEvent(                            \
			
 
				+          tag, Profiler::EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT, \
			
 
				+          delegate_status, interpreter_status);                            \
			
 
				+      profiler->EndEvent(handle);                                          \
			
 
				+    }                                                                      \
			
 
				+  } while (false);
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_CORE_API_PROFILER_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/core/api/tensor_utils.cc
+++ b/tensorflow/lite/micro/tensorflow/lite/core/api/tensor_utils.cc
@@ -0,0 +1,50 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include "tensorflow/lite/core/api/tensor_utils.h"
			
 
				+
			
 
				+#include <string.h>
			
 
				+
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) {
			
 
				+  if (!tensor->is_variable) {
			
 
				+    return kTfLiteOk;
			
 
				+  }
			
 
				+  // TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it
			
 
				+  // to the value of the buffer.
			
 
				+  int value = 0;
			
 
				+  if (tensor->type == kTfLiteInt8) {
			
 
				+    value = tensor->params.zero_point;
			
 
				+  }
			
 
				+  // TODO(b/139446230): Provide a platform header to better handle these
			
 
				+  // specific scenarios.
			
 
				+#if __ANDROID__ || defined(__x86_64__) || defined(__i386__) || \
			
 
				+    defined(__i386) || defined(__x86__) || defined(__X86__) || \
			
 
				+    defined(_X86_) || defined(_M_IX86) || defined(_M_X64)
			
 
				+  memset(tensor->data.raw, value, tensor->bytes);
			
 
				+#else
			
 
				+  char* raw_ptr = tensor->data.raw;
			
 
				+  for (size_t i = 0; i < tensor->bytes; ++i) {
			
 
				+    *raw_ptr = value;
			
 
				+    raw_ptr++;
			
 
				+  }
			
 
				+#endif
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/tensorflow/lite/micro/tensorflow/lite/core/api/tensor_utils.h
+++ b/tensorflow/lite/micro/tensorflow/lite/core/api/tensor_utils.h
@@ -0,0 +1,28 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#ifndef TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
			
 
				+#define TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
			
 
				+
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+// Resets a variable tensor to the default value.
			
 
				+TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor);
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/SConscript
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/SConscript
@@ -0,0 +1,16 @@
 
				+# RT-Thread building script for bridge
			
 
				+
			
 
				+import os
			
 
				+from building import *
			
 
				+
			
 
				+cwd = GetCurrentDir()
			
 
				+objs = []
			
 
				+list = os.listdir(cwd)
			
 
				+
			
 
				+if GetDepend('PKG_USING_TENSORFLOWLITEMICRO'):
			
 
				+    for d in list:
			
 
				+        path = os.path.join(cwd, d)
			
 
				+        if os.path.isfile(os.path.join(path, 'SConscript')):
			
 
				+            objs = objs + SConscript(os.path.join(d, 'SConscript'))
			
 
				+
			
 
				+Return('objs')
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/SConscript
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/SConscript
@@ -0,0 +1,16 @@
 
				+# RT-Thread building script for bridge
			
 
				+
			
 
				+import os
			
 
				+from building import *
			
 
				+
			
 
				+cwd = GetCurrentDir()
			
 
				+objs = []
			
 
				+list = os.listdir(cwd)
			
 
				+
			
 
				+if GetDepend('PKG_USING_TENSORFLOWLITEMICRO'):
			
 
				+    for d in list:
			
 
				+        path = os.path.join(cwd, d)
			
 
				+        if os.path.isfile(os.path.join(path, 'SConscript')):
			
 
				+            objs = objs + SConscript(os.path.join(d, 'SConscript'))
			
 
				+
			
 
				+Return('objs')
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/SConscript
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/SConscript
@@ -0,0 +1,28 @@
 
				+from building import *
			
 
				+import os
			
 
				+
			
 
				+cwd = GetCurrentDir()
			
 
				+src = Glob('*.c') + Glob('*.cc')
			
 
				+
			
 
				+#.
			
 
				+root =  str(Dir('#'))
			
 
				+packages = os.path.join(root, 'packages')
			
 
				+file_list = os.listdir(packages)
			
 
				+for f in file_list:
			
 
				+    if(f.split('-')[0] == 'TensorflowLiteMicro'):
			
 
				+        tflm_pkg = os.path.join(packages, f)
			
 
				+        break
			
 
				+#./third_party/flatbuffer/include
			
 
				+flatbuffer = os.path.join(tflm_pkg, "third_party/flatbuffers/include")
			
 
				+#./third_party/gemmlowp
			
 
				+gemmlowp = os.path.join(tflm_pkg, "third_party/gemmlowp")
			
 
				+#./third_party/kissfft
			
 
				+kissfft = os.path.join(tflm_pkg, "third_party/kissfft")
			
 
				+#./third_party/ruy
			
 
				+ruy = os.path.join(tflm_pkg, "third_party/ruy")
			
 
				+
			
 
				+CPPPATH = [tflm_pkg, flatbuffer, gemmlowp, kissfft, ruy]
			
 
				+
			
 
				+group = DefineGroup('lite', src, depend = ['PKG_USING_TENSORFLOWLITEMICRO'], CPPPATH = CPPPATH)
			
 
				+
			
 
				+Return('group')
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/bits.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/bits.h
@@ -0,0 +1,102 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+#include <cstdint>
			
 
				+
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+static inline int CountLeadingZeros32Slow(uint64_t n) {
			
 
				+  int zeroes = 28;
			
 
				+  if (n >> 16) zeroes -= 16, n >>= 16;
			
 
				+  if (n >> 8) zeroes -= 8, n >>= 8;
			
 
				+  if (n >> 4) zeroes -= 4, n >>= 4;
			
 
				+  return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
			
 
				+}
			
 
				+
			
 
				+static inline int CountLeadingZeros32(uint32_t n) {
			
 
				+#if defined(_MSC_VER)
			
 
				+  unsigned long result = 0;  // NOLINT(runtime/int)
			
 
				+  if (_BitScanReverse(&result, n)) {
			
 
				+    return 31 - result;
			
 
				+  }
			
 
				+  return 32;
			
 
				+#elif defined(__GNUC__)
			
 
				+
			
 
				+  // Handle 0 as a special case because __builtin_clz(0) is undefined.
			
 
				+  if (n == 0) {
			
 
				+    return 32;
			
 
				+  }
			
 
				+  return __builtin_clz(n);
			
 
				+#else
			
 
				+  return CountLeadingZeros32Slow(n);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static inline int MostSignificantBit32(uint32_t n) {
			
 
				+  return 32 - CountLeadingZeros32(n);
			
 
				+}
			
 
				+
			
 
				+static inline int CountLeadingZeros64Slow(uint64_t n) {
			
 
				+  int zeroes = 60;
			
 
				+  if (n >> 32) zeroes -= 32, n >>= 32;
			
 
				+  if (n >> 16) zeroes -= 16, n >>= 16;
			
 
				+  if (n >> 8) zeroes -= 8, n >>= 8;
			
 
				+  if (n >> 4) zeroes -= 4, n >>= 4;
			
 
				+  return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
			
 
				+}
			
 
				+
			
 
				+static inline int CountLeadingZeros64(uint64_t n) {
			
 
				+#if defined(_MSC_VER) && defined(_M_X64)
			
 
				+  // MSVC does not have __builtin_clzll. Use _BitScanReverse64.
			
 
				+  unsigned long result = 0;  // NOLINT(runtime/int)
			
 
				+  if (_BitScanReverse64(&result, n)) {
			
 
				+    return 63 - result;
			
 
				+  }
			
 
				+  return 64;
			
 
				+#elif defined(_MSC_VER)
			
 
				+  // MSVC does not have __builtin_clzll. Compose two calls to _BitScanReverse
			
 
				+  unsigned long result = 0;  // NOLINT(runtime/int)
			
 
				+  if ((n >> 32) && _BitScanReverse(&result, n >> 32)) {
			
 
				+    return 31 - result;
			
 
				+  }
			
 
				+  if (_BitScanReverse(&result, n)) {
			
 
				+    return 63 - result;
			
 
				+  }
			
 
				+  return 64;
			
 
				+#elif defined(__GNUC__)
			
 
				+
			
 
				+  // Handle 0 as a special case because __builtin_clzll(0) is undefined.
			
 
				+  if (n == 0) {
			
 
				+    return 64;
			
 
				+  }
			
 
				+  return __builtin_clzll(n);
			
 
				+#else
			
 
				+  return CountLeadingZeros64Slow(n);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static inline int MostSignificantBit64(uint64_t n) {
			
 
				+  return 64 - CountLeadingZeros64(n);
			
 
				+}
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/fft.cc
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/fft.cc
@@ -0,0 +1,54 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
			
 
				+
			
 
				+#include <string.h>
			
 
				+
			
 
				+#define FIXED_POINT 16
			
 
				+#include "kiss_fft.h"
			
 
				+#include "tools/kiss_fftr.h"
			
 
				+
			
 
				+void FftCompute(struct FftState* state, const int16_t* input,
			
 
				+                int input_scale_shift) {
			
 
				+  const size_t input_size = state->input_size;
			
 
				+  const size_t fft_size = state->fft_size;
			
 
				+
			
 
				+  int16_t* fft_input = state->input;
			
 
				+  // First, scale the input by the given shift.
			
 
				+  size_t i;
			
 
				+  for (i = 0; i < input_size; ++i) {
			
 
				+    fft_input[i] = static_cast<int16_t>(static_cast<uint16_t>(input[i])
			
 
				+                                        << input_scale_shift);
			
 
				+  }
			
 
				+  // Zero out whatever else remains in the top part of the input.
			
 
				+  for (; i < fft_size; ++i) {
			
 
				+    fft_input[i] = 0;
			
 
				+  }
			
 
				+
			
 
				+  // Apply the FFT.
			
 
				+  kiss_fftr(
			
 
				+      reinterpret_cast<const kiss_fftr_cfg>(state->scratch),
			
 
				+      state->input,
			
 
				+      reinterpret_cast<kiss_fft_cpx*>(state->output));
			
 
				+}
			
 
				+
			
 
				+void FftInit(struct FftState* state) {
			
 
				+  // All the initialization is done in FftPopulateState()
			
 
				+}
			
 
				+
			
 
				+void FftReset(struct FftState* state) {
			
 
				+  memset(state->input, 0, state->fft_size * sizeof(*state->input));
			
 
				+  memset(state->output, 0, (state->fft_size / 2 + 1) * sizeof(*state->output));
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/fft.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/fft.h
@@ -0,0 +1,50 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+struct complex_int16_t {
			
 
				+  int16_t real;
			
 
				+  int16_t imag;
			
 
				+};
			
 
				+
			
 
				+struct FftState {
			
 
				+  int16_t* input;
			
 
				+  struct complex_int16_t* output;
			
 
				+  size_t fft_size;
			
 
				+  size_t input_size;
			
 
				+  void* scratch;
			
 
				+  size_t scratch_size;
			
 
				+};
			
 
				+
			
 
				+void FftCompute(struct FftState* state, const int16_t* input,
			
 
				+                int input_scale_shift);
			
 
				+
			
 
				+void FftInit(struct FftState* state);
			
 
				+
			
 
				+void FftReset(struct FftState* state);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/fft_util.cc
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/fft_util.cc
@@ -0,0 +1,72 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h"
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#define FIXED_POINT 16
			
 
				+#include "kiss_fft.h"
			
 
				+#include "tools/kiss_fftr.h"
			
 
				+
			
 
				+int FftPopulateState(struct FftState* state, size_t input_size) {
			
 
				+  state->input_size = input_size;
			
 
				+  state->fft_size = 1;
			
 
				+  while (state->fft_size < state->input_size) {
			
 
				+    state->fft_size <<= 1;
			
 
				+  }
			
 
				+
			
 
				+  state->input = reinterpret_cast<int16_t*>(
			
 
				+      malloc(state->fft_size * sizeof(*state->input)));
			
 
				+  if (state->input == nullptr) {
			
 
				+    fprintf(stderr, "Failed to alloc fft input buffer\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  state->output = reinterpret_cast<complex_int16_t*>(
			
 
				+      malloc((state->fft_size / 2 + 1) * sizeof(*state->output) * 2));
			
 
				+  if (state->output == nullptr) {
			
 
				+    fprintf(stderr, "Failed to alloc fft output buffer\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  // Ask kissfft how much memory it wants.
			
 
				+  size_t scratch_size = 0;
			
 
				+  kiss_fftr_cfg kfft_cfg = kiss_fftr_alloc(
			
 
				+      state->fft_size, 0, nullptr, &scratch_size);
			
 
				+  if (kfft_cfg != nullptr) {
			
 
				+    fprintf(stderr, "Kiss memory sizing failed.\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+  state->scratch = malloc(scratch_size);
			
 
				+  if (state->scratch == nullptr) {
			
 
				+    fprintf(stderr, "Failed to alloc fft scratch buffer\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+  state->scratch_size = scratch_size;
			
 
				+  // Let kissfft configure the scratch space we just allocated
			
 
				+  kfft_cfg = kiss_fftr_alloc(state->fft_size, 0,
			
 
				+                                              state->scratch, &scratch_size);
			
 
				+  if (kfft_cfg != state->scratch) {
			
 
				+    fprintf(stderr, "Kiss memory preallocation strategy failed.\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+  return 1;
			
 
				+}
			
 
				+
			
 
				+void FftFreeStateContents(struct FftState* state) {
			
 
				+  free(state->input);
			
 
				+  free(state->output);
			
 
				+  free(state->scratch);
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/fft_util.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/fft_util.h
@@ -0,0 +1,34 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
			
 
				+
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Prepares and FFT for the given input size.
			
 
				+int FftPopulateState(struct FftState* state, size_t input_size);
			
 
				+
			
 
				+// Frees any allocated buffers.
			
 
				+void FftFreeStateContents(struct FftState* state);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/filterbank.c
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/filterbank.c
@@ -0,0 +1,134 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
			
 
				+
			
 
				+#include <string.h>
			
 
				+
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
			
 
				+
			
 
				+void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
			
 
				+                                         struct complex_int16_t* fft_output,
			
 
				+                                         int32_t* energy) {
			
 
				+  const int end_index = state->end_index;
			
 
				+  int i;
			
 
				+  energy += state->start_index;
			
 
				+  fft_output += state->start_index;
			
 
				+  for (i = state->start_index; i < end_index; ++i) {
			
 
				+    const int32_t real = fft_output->real;
			
 
				+    const int32_t imag = fft_output->imag;
			
 
				+    fft_output++;
			
 
				+    const uint32_t mag_squared = (real * real) + (imag * imag);
			
 
				+    *energy++ = mag_squared;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void FilterbankAccumulateChannels(struct FilterbankState* state,
			
 
				+                                  const int32_t* energy) {
			
 
				+  uint64_t* work = state->work;
			
 
				+  uint64_t weight_accumulator = 0;
			
 
				+  uint64_t unweight_accumulator = 0;
			
 
				+
			
 
				+  const int16_t* channel_frequency_starts = state->channel_frequency_starts;
			
 
				+  const int16_t* channel_weight_starts = state->channel_weight_starts;
			
 
				+  const int16_t* channel_widths = state->channel_widths;
			
 
				+
			
 
				+  int num_channels_plus_1 = state->num_channels + 1;
			
 
				+  int i;
			
 
				+  for (i = 0; i < num_channels_plus_1; ++i) {
			
 
				+    const int32_t* magnitudes = energy + *channel_frequency_starts++;
			
 
				+    const int16_t* weights = state->weights + *channel_weight_starts;
			
 
				+    const int16_t* unweights = state->unweights + *channel_weight_starts++;
			
 
				+    const int width = *channel_widths++;
			
 
				+    int j;
			
 
				+    for (j = 0; j < width; ++j) {
			
 
				+      weight_accumulator += *weights++ * ((uint64_t)*magnitudes);
			
 
				+      unweight_accumulator += *unweights++ * ((uint64_t)*magnitudes);
			
 
				+      ++magnitudes;
			
 
				+    }
			
 
				+    *work++ = weight_accumulator;
			
 
				+    weight_accumulator = unweight_accumulator;
			
 
				+    unweight_accumulator = 0;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+static uint16_t Sqrt32(uint32_t num) {
			
 
				+  if (num == 0) {
			
 
				+    return 0;
			
 
				+  }
			
 
				+  uint32_t res = 0;
			
 
				+  int max_bit_number = 32 - MostSignificantBit32(num);
			
 
				+  max_bit_number |= 1;
			
 
				+  uint32_t bit = 1U << (31 - max_bit_number);
			
 
				+  int iterations = (31 - max_bit_number) / 2 + 1;
			
 
				+  while (iterations--) {
			
 
				+    if (num >= res + bit) {
			
 
				+      num -= res + bit;
			
 
				+      res = (res >> 1U) + bit;
			
 
				+    } else {
			
 
				+      res >>= 1U;
			
 
				+    }
			
 
				+    bit >>= 2U;
			
 
				+  }
			
 
				+  // Do rounding - if we have the bits.
			
 
				+  if (num > res && res != 0xFFFF) {
			
 
				+    ++res;
			
 
				+  }
			
 
				+  return res;
			
 
				+}
			
 
				+
			
 
				+static uint32_t Sqrt64(uint64_t num) {
			
 
				+  // Take a shortcut and just use 32 bit operations if the upper word is all
			
 
				+  // clear. This will cause a slight off by one issue for numbers close to 2^32,
			
 
				+  // but it probably isn't going to matter (and gives us a big performance win).
			
 
				+  if ((num >> 32) == 0) {
			
 
				+    return Sqrt32((uint32_t)num);
			
 
				+  }
			
 
				+  uint64_t res = 0;
			
 
				+  int max_bit_number = 64 - MostSignificantBit64(num);
			
 
				+  max_bit_number |= 1;
			
 
				+  uint64_t bit = 1ULL << (63 - max_bit_number);
			
 
				+  int iterations = (63 - max_bit_number) / 2 + 1;
			
 
				+  while (iterations--) {
			
 
				+    if (num >= res + bit) {
			
 
				+      num -= res + bit;
			
 
				+      res = (res >> 1U) + bit;
			
 
				+    } else {
			
 
				+      res >>= 1U;
			
 
				+    }
			
 
				+    bit >>= 2U;
			
 
				+  }
			
 
				+  // Do rounding - if we have the bits.
			
 
				+  if (num > res && res != 0xFFFFFFFFLL) {
			
 
				+    ++res;
			
 
				+  }
			
 
				+  return res;
			
 
				+}
			
 
				+
			
 
				+uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift) {
			
 
				+  const int num_channels = state->num_channels;
			
 
				+  const uint64_t* work = state->work + 1;
			
 
				+  // Reuse the work buffer since we're fine clobbering it at this point to hold
			
 
				+  // the output.
			
 
				+  uint32_t* output = (uint32_t*)state->work;
			
 
				+  int i;
			
 
				+  for (i = 0; i < num_channels; ++i) {
			
 
				+    *output++ = Sqrt64(*work++) >> scale_down_shift;
			
 
				+  }
			
 
				+  return (uint32_t*)state->work;
			
 
				+}
			
 
				+
			
 
				+void FilterbankReset(struct FilterbankState* state) {
			
 
				+  memset(state->work, 0, (state->num_channels + 1) * sizeof(*state->work));
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/filterbank.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/filterbank.h
@@ -0,0 +1,63 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
			
 
				+
			
 
				+#define kFilterbankBits 12
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+struct FilterbankState {
			
 
				+  int num_channels;
			
 
				+  int start_index;
			
 
				+  int end_index;
			
 
				+  int16_t* channel_frequency_starts;
			
 
				+  int16_t* channel_weight_starts;
			
 
				+  int16_t* channel_widths;
			
 
				+  int16_t* weights;
			
 
				+  int16_t* unweights;
			
 
				+  uint64_t* work;
			
 
				+};
			
 
				+
			
 
				+// Converts the relevant complex values of an FFT output into energy (the
			
 
				+// square magnitude).
			
 
				+void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
			
 
				+                                         struct complex_int16_t* fft_output,
			
 
				+                                         int32_t* energy);
			
 
				+
			
 
				+// Computes the mel-scale filterbank on the given energy array. Output is cached
			
 
				+// internally - to fetch it, you need to call FilterbankSqrt.
			
 
				+void FilterbankAccumulateChannels(struct FilterbankState* state,
			
 
				+                                  const int32_t* energy);
			
 
				+
			
 
				+// Applies an integer square root to the 64 bit intermediate values of the
			
 
				+// filterbank, and returns a pointer to them. Memory will be invalidated the
			
 
				+// next time FilterbankAccumulateChannels is called.
			
 
				+uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift);
			
 
				+
			
 
				+void FilterbankReset(struct FilterbankState* state);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.c
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.c
@@ -0,0 +1,220 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h"
			
 
				+
			
 
				+#include <assert.h>
			
 
				+#include <math.h>
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#define kFilterbankIndexAlignment 4
			
 
				+#define kFilterbankChannelBlockSize 4
			
 
				+
			
 
				+void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config) {
			
 
				+  config->num_channels = 32;
			
 
				+  config->lower_band_limit = 125.0f;
			
 
				+  config->upper_band_limit = 7500.0f;
			
 
				+  config->output_scale_shift = 7;
			
 
				+}
			
 
				+
			
 
				+static float FreqToMel(float freq) { return 1127.0 * log1p(freq / 700.0); }
			
 
				+
			
 
				+static void CalculateCenterFrequencies(const int num_channels,
			
 
				+                                       const float lower_frequency_limit,
			
 
				+                                       const float upper_frequency_limit,
			
 
				+                                       float* center_frequencies) {
			
 
				+  assert(lower_frequency_limit >= 0.0f);
			
 
				+  assert(upper_frequency_limit > lower_frequency_limit);
			
 
				+
			
 
				+  const float mel_low = FreqToMel(lower_frequency_limit);
			
 
				+  const float mel_hi = FreqToMel(upper_frequency_limit);
			
 
				+  const float mel_span = mel_hi - mel_low;
			
 
				+  const float mel_spacing = mel_span / ((float)num_channels);
			
 
				+  int i;
			
 
				+  for (i = 0; i < num_channels; ++i) {
			
 
				+    center_frequencies[i] = mel_low + (mel_spacing * (i + 1));
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+static void QuantizeFilterbankWeights(const float float_weight, int16_t* weight,
			
 
				+                                      int16_t* unweight) {
			
 
				+  *weight = floor(float_weight * (1 << kFilterbankBits) + 0.5);
			
 
				+  *unweight = floor((1.0 - float_weight) * (1 << kFilterbankBits) + 0.5);
			
 
				+}
			
 
				+
			
 
				+int FilterbankPopulateState(const struct FilterbankConfig* config,
			
 
				+                            struct FilterbankState* state, int sample_rate,
			
 
				+                            int spectrum_size) {
			
 
				+  state->num_channels = config->num_channels;
			
 
				+  const int num_channels_plus_1 = config->num_channels + 1;
			
 
				+
			
 
				+  // How should we align things to index counts given the byte alignment?
			
 
				+  const int index_alignment =
			
 
				+      (kFilterbankIndexAlignment < sizeof(int16_t)
			
 
				+           ? 1
			
 
				+           : kFilterbankIndexAlignment / sizeof(int16_t));
			
 
				+
			
 
				+  state->channel_frequency_starts =
			
 
				+      malloc(num_channels_plus_1 * sizeof(*state->channel_frequency_starts));
			
 
				+  state->channel_weight_starts =
			
 
				+      malloc(num_channels_plus_1 * sizeof(*state->channel_weight_starts));
			
 
				+  state->channel_widths =
			
 
				+      malloc(num_channels_plus_1 * sizeof(*state->channel_widths));
			
 
				+  state->work = malloc(num_channels_plus_1 * sizeof(*state->work));
			
 
				+
			
 
				+  float* center_mel_freqs =
			
 
				+      malloc(num_channels_plus_1 * sizeof(*center_mel_freqs));
			
 
				+  int16_t* actual_channel_starts =
			
 
				+      malloc(num_channels_plus_1 * sizeof(*actual_channel_starts));
			
 
				+  int16_t* actual_channel_widths =
			
 
				+      malloc(num_channels_plus_1 * sizeof(*actual_channel_widths));
			
 
				+
			
 
				+  if (state->channel_frequency_starts == NULL ||
			
 
				+      state->channel_weight_starts == NULL || state->channel_widths == NULL ||
			
 
				+      center_mel_freqs == NULL || actual_channel_starts == NULL ||
			
 
				+      actual_channel_widths == NULL) {
			
 
				+    free(center_mel_freqs);
			
 
				+    free(actual_channel_starts);
			
 
				+    free(actual_channel_widths);
			
 
				+    fprintf(stderr, "Failed to allocate channel buffers\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  CalculateCenterFrequencies(num_channels_plus_1, config->lower_band_limit,
			
 
				+                             config->upper_band_limit, center_mel_freqs);
			
 
				+
			
 
				+  // Always exclude DC.
			
 
				+  const float hz_per_sbin = 0.5 * sample_rate / ((float)spectrum_size - 1);
			
 
				+  state->start_index = 1.5 + config->lower_band_limit / hz_per_sbin;
			
 
				+  state->end_index = 0;  // Initialized to zero here, but actually set below.
			
 
				+
			
 
				+  // For each channel, we need to figure out what frequencies belong to it, and
			
 
				+  // how much padding we need to add so that we can efficiently multiply the
			
 
				+  // weights and unweights for accumulation. To simplify the multiplication
			
 
				+  // logic, all channels will have some multiplication to do (even if there are
			
 
				+  // no frequencies that accumulate to that channel) - they will be directed to
			
 
				+  // a set of zero weights.
			
 
				+  int chan_freq_index_start = state->start_index;
			
 
				+  int weight_index_start = 0;
			
 
				+  int needs_zeros = 0;
			
 
				+
			
 
				+  int chan;
			
 
				+  for (chan = 0; chan < num_channels_plus_1; ++chan) {
			
 
				+    // Keep jumping frequencies until we overshoot the bound on this channel.
			
 
				+    int freq_index = chan_freq_index_start;
			
 
				+    while (FreqToMel((freq_index)*hz_per_sbin) <= center_mel_freqs[chan]) {
			
 
				+      ++freq_index;
			
 
				+    }
			
 
				+
			
 
				+    const int width = freq_index - chan_freq_index_start;
			
 
				+    actual_channel_starts[chan] = chan_freq_index_start;
			
 
				+    actual_channel_widths[chan] = width;
			
 
				+
			
 
				+    if (width == 0) {
			
 
				+      // This channel doesn't actually get anything from the frequencies, it's
			
 
				+      // always zero. We need then to insert some 'zero' weights into the
			
 
				+      // output, and just redirect this channel to do a single multiplication at
			
 
				+      // this point. For simplicity, the zeros are placed at the beginning of
			
 
				+      // the weights arrays, so we have to go and update all the other
			
 
				+      // weight_starts to reflect this shift (but only once).
			
 
				+      state->channel_frequency_starts[chan] = 0;
			
 
				+      state->channel_weight_starts[chan] = 0;
			
 
				+      state->channel_widths[chan] = kFilterbankChannelBlockSize;
			
 
				+      if (!needs_zeros) {
			
 
				+        needs_zeros = 1;
			
 
				+        int j;
			
 
				+        for (j = 0; j < chan; ++j) {
			
 
				+          state->channel_weight_starts[j] += kFilterbankChannelBlockSize;
			
 
				+        }
			
 
				+        weight_index_start += kFilterbankChannelBlockSize;
			
 
				+      }
			
 
				+    } else {
			
 
				+      // How far back do we need to go to ensure that we have the proper
			
 
				+      // alignment?
			
 
				+      const int aligned_start =
			
 
				+          (chan_freq_index_start / index_alignment) * index_alignment;
			
 
				+      const int aligned_width = (chan_freq_index_start - aligned_start + width);
			
 
				+      const int padded_width =
			
 
				+          (((aligned_width - 1) / kFilterbankChannelBlockSize) + 1) *
			
 
				+          kFilterbankChannelBlockSize;
			
 
				+
			
 
				+      state->channel_frequency_starts[chan] = aligned_start;
			
 
				+      state->channel_weight_starts[chan] = weight_index_start;
			
 
				+      state->channel_widths[chan] = padded_width;
			
 
				+      weight_index_start += padded_width;
			
 
				+    }
			
 
				+    chan_freq_index_start = freq_index;
			
 
				+  }
			
 
				+
			
 
				+  // Allocate the two arrays to store the weights - weight_index_start contains
			
 
				+  // the index of what would be the next set of weights that we would need to
			
 
				+  // add, so that's how many weights we need to allocate.
			
 
				+  state->weights = calloc(weight_index_start, sizeof(*state->weights));
			
 
				+  state->unweights = calloc(weight_index_start, sizeof(*state->unweights));
			
 
				+
			
 
				+  // If the alloc failed, we also need to nuke the arrays.
			
 
				+  if (state->weights == NULL || state->unweights == NULL) {
			
 
				+    free(center_mel_freqs);
			
 
				+    free(actual_channel_starts);
			
 
				+    free(actual_channel_widths);
			
 
				+    fprintf(stderr, "Failed to allocate weights or unweights\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  // Next pass, compute all the weights. Since everything has been memset to
			
 
				+  // zero, we only need to fill in the weights that correspond to some frequency
			
 
				+  // for a channel.
			
 
				+  const float mel_low = FreqToMel(config->lower_band_limit);
			
 
				+  for (chan = 0; chan < num_channels_plus_1; ++chan) {
			
 
				+    int frequency = actual_channel_starts[chan];
			
 
				+    const int num_frequencies = actual_channel_widths[chan];
			
 
				+    const int frequency_offset =
			
 
				+        frequency - state->channel_frequency_starts[chan];
			
 
				+    const int weight_start = state->channel_weight_starts[chan];
			
 
				+    const float denom_val = (chan == 0) ? mel_low : center_mel_freqs[chan - 1];
			
 
				+
			
 
				+    int j;
			
 
				+    for (j = 0; j < num_frequencies; ++j, ++frequency) {
			
 
				+      const float weight =
			
 
				+          (center_mel_freqs[chan] - FreqToMel(frequency * hz_per_sbin)) /
			
 
				+          (center_mel_freqs[chan] - denom_val);
			
 
				+
			
 
				+      // Make the float into an integer for the weights (and unweights).
			
 
				+      const int weight_index = weight_start + frequency_offset + j;
			
 
				+      QuantizeFilterbankWeights(weight, state->weights + weight_index,
			
 
				+                                state->unweights + weight_index);
			
 
				+    }
			
 
				+    if (frequency > state->end_index) {
			
 
				+      state->end_index = frequency;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  free(center_mel_freqs);
			
 
				+  free(actual_channel_starts);
			
 
				+  free(actual_channel_widths);
			
 
				+  if (state->end_index >= spectrum_size) {
			
 
				+    fprintf(stderr, "Filterbank end_index is above spectrum size.\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+  return 1;
			
 
				+}
			
 
				+
			
 
				+void FilterbankFreeStateContents(struct FilterbankState* state) {
			
 
				+  free(state->channel_frequency_starts);
			
 
				+  free(state->channel_weight_starts);
			
 
				+  free(state->channel_widths);
			
 
				+  free(state->weights);
			
 
				+  free(state->unweights);
			
 
				+  free(state->work);
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h
@@ -0,0 +1,50 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
			
 
				+
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+struct FilterbankConfig {
			
 
				+  // number of frequency channel buckets for filterbank
			
 
				+  int num_channels;
			
 
				+  // maximum frequency to include
			
 
				+  float upper_band_limit;
			
 
				+  // minimum frequency to include
			
 
				+  float lower_band_limit;
			
 
				+  // unused
			
 
				+  int output_scale_shift;
			
 
				+};
			
 
				+
			
 
				+// Fills the frontendConfig with "sane" defaults.
			
 
				+void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config);
			
 
				+
			
 
				+// Allocates any buffers.
			
 
				+int FilterbankPopulateState(const struct FilterbankConfig* config,
			
 
				+                            struct FilterbankState* state, int sample_rate,
			
 
				+                            int spectrum_size);
			
 
				+
			
 
				+// Frees any allocated buffers.
			
 
				+void FilterbankFreeStateContents(struct FilterbankState* state);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/frontend.c
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/frontend.c
@@ -0,0 +1,72 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
			
 
				+
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
			
 
				+
			
 
				+struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
			
 
				+                                             const int16_t* samples,
			
 
				+                                             size_t num_samples,
			
 
				+                                             size_t* num_samples_read) {
			
 
				+  struct FrontendOutput output;
			
 
				+  output.values = NULL;
			
 
				+  output.size = 0;
			
 
				+
			
 
				+  // Try to apply the window - if it fails, return and wait for more data.
			
 
				+  if (!WindowProcessSamples(&state->window, samples, num_samples,
			
 
				+                            num_samples_read)) {
			
 
				+    return output;
			
 
				+  }
			
 
				+
			
 
				+  // Apply the FFT to the window's output (and scale it so that the fixed point
			
 
				+  // FFT can have as much resolution as possible).
			
 
				+  int input_shift =
			
 
				+      15 - MostSignificantBit32(state->window.max_abs_output_value);
			
 
				+  FftCompute(&state->fft, state->window.output, input_shift);
			
 
				+
			
 
				+  // We can re-ruse the fft's output buffer to hold the energy.
			
 
				+  int32_t* energy = (int32_t*)state->fft.output;
			
 
				+
			
 
				+  FilterbankConvertFftComplexToEnergy(&state->filterbank, state->fft.output,
			
 
				+                                      energy);
			
 
				+
			
 
				+  FilterbankAccumulateChannels(&state->filterbank, energy);
			
 
				+  uint32_t* scaled_filterbank = FilterbankSqrt(&state->filterbank, input_shift);
			
 
				+
			
 
				+  // Apply noise reduction.
			
 
				+  NoiseReductionApply(&state->noise_reduction, scaled_filterbank);
			
 
				+
			
 
				+  if (state->pcan_gain_control.enable_pcan) {
			
 
				+    PcanGainControlApply(&state->pcan_gain_control, scaled_filterbank);
			
 
				+  }
			
 
				+
			
 
				+  // Apply the log and scale.
			
 
				+  int correction_bits =
			
 
				+      MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
			
 
				+  uint16_t* logged_filterbank =
			
 
				+      LogScaleApply(&state->log_scale, scaled_filterbank,
			
 
				+                    state->filterbank.num_channels, correction_bits);
			
 
				+
			
 
				+  output.size = state->filterbank.num_channels;
			
 
				+  output.values = logged_filterbank;
			
 
				+  return output;
			
 
				+}
			
 
				+
			
 
				+void FrontendReset(struct FrontendState* state) {
			
 
				+  WindowReset(&state->window);
			
 
				+  FftReset(&state->fft);
			
 
				+  FilterbankReset(&state->filterbank);
			
 
				+  NoiseReductionReset(&state->noise_reduction);
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/frontend.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/frontend.h
@@ -0,0 +1,64 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+struct FrontendState {
			
 
				+  struct WindowState window;
			
 
				+  struct FftState fft;
			
 
				+  struct FilterbankState filterbank;
			
 
				+  struct NoiseReductionState noise_reduction;
			
 
				+  struct PcanGainControlState pcan_gain_control;
			
 
				+  struct LogScaleState log_scale;
			
 
				+};
			
 
				+
			
 
				+struct FrontendOutput {
			
 
				+  const uint16_t* values;
			
 
				+  size_t size;
			
 
				+};
			
 
				+
			
 
				+// Main entry point to processing frontend samples. Updates num_samples_read to
			
 
				+// contain the number of samples that have been consumed from the input array.
			
 
				+// Returns a struct containing the generated output. If not enough samples were
			
 
				+// added to generate a feature vector, the returned size will be 0 and the
			
 
				+// values pointer will be NULL. Note that the output pointer will be invalidated
			
 
				+// as soon as FrontendProcessSamples is called again, so copy the contents
			
 
				+// elsewhere if you need to use them later.
			
 
				+struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
			
 
				+                                             const int16_t* samples,
			
 
				+                                             size_t num_samples,
			
 
				+                                             size_t* num_samples_read);
			
 
				+
			
 
				+void FrontendReset(struct FrontendState* state);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/frontend_util.c
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/frontend_util.c
@@ -0,0 +1,85 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/frontend_util.h"
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include <string.h>
			
 
				+
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
			
 
				+
			
 
				+void FrontendFillConfigWithDefaults(struct FrontendConfig* config) {
			
 
				+  WindowFillConfigWithDefaults(&config->window);
			
 
				+  FilterbankFillConfigWithDefaults(&config->filterbank);
			
 
				+  NoiseReductionFillConfigWithDefaults(&config->noise_reduction);
			
 
				+  PcanGainControlFillConfigWithDefaults(&config->pcan_gain_control);
			
 
				+  LogScaleFillConfigWithDefaults(&config->log_scale);
			
 
				+}
			
 
				+
			
 
				+int FrontendPopulateState(const struct FrontendConfig* config,
			
 
				+                          struct FrontendState* state, int sample_rate) {
			
 
				+  memset(state, 0, sizeof(*state));
			
 
				+
			
 
				+  if (!WindowPopulateState(&config->window, &state->window, sample_rate)) {
			
 
				+    fprintf(stderr, "Failed to populate window state\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  if (!FftPopulateState(&state->fft, state->window.size)) {
			
 
				+    fprintf(stderr, "Failed to populate fft state\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+  FftInit(&state->fft);
			
 
				+
			
 
				+  if (!FilterbankPopulateState(&config->filterbank, &state->filterbank,
			
 
				+                               sample_rate, state->fft.fft_size / 2 + 1)) {
			
 
				+    fprintf(stderr, "Failed to populate filterbank state\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  if (!NoiseReductionPopulateState(&config->noise_reduction,
			
 
				+                                   &state->noise_reduction,
			
 
				+                                   state->filterbank.num_channels)) {
			
 
				+    fprintf(stderr, "Failed to populate noise reduction state\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  int input_correction_bits =
			
 
				+      MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
			
 
				+  if (!PcanGainControlPopulateState(
			
 
				+          &config->pcan_gain_control, &state->pcan_gain_control,
			
 
				+          state->noise_reduction.estimate, state->filterbank.num_channels,
			
 
				+          state->noise_reduction.smoothing_bits, input_correction_bits)) {
			
 
				+    fprintf(stderr, "Failed to populate pcan gain control state\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  if (!LogScalePopulateState(&config->log_scale, &state->log_scale)) {
			
 
				+    fprintf(stderr, "Failed to populate log scale state\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  FrontendReset(state);
			
 
				+
			
 
				+  // All good, return a true value.
			
 
				+  return 1;
			
 
				+}
			
 
				+
			
 
				+void FrontendFreeStateContents(struct FrontendState* state) {
			
 
				+  WindowFreeStateContents(&state->window);
			
 
				+  FftFreeStateContents(&state->fft);
			
 
				+  FilterbankFreeStateContents(&state->filterbank);
			
 
				+  NoiseReductionFreeStateContents(&state->noise_reduction);
			
 
				+  PcanGainControlFreeStateContents(&state->pcan_gain_control);
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h
@@ -0,0 +1,52 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
			
 
				+
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h"
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h"
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h"
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h"
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+struct FrontendConfig {
			
 
				+  struct WindowConfig window;
			
 
				+  struct FilterbankConfig filterbank;
			
 
				+  struct NoiseReductionConfig noise_reduction;
			
 
				+  struct PcanGainControlConfig pcan_gain_control;
			
 
				+  struct LogScaleConfig log_scale;
			
 
				+};
			
 
				+
			
 
				+// Fills the frontendConfig with "sane" defaults.
			
 
				+void FrontendFillConfigWithDefaults(struct FrontendConfig* config);
			
 
				+
			
 
				+// Allocates any buffers.
			
 
				+int FrontendPopulateState(const struct FrontendConfig* config,
			
 
				+                          struct FrontendState* state, int sample_rate);
			
 
				+
			
 
				+// Frees any allocated buffers.
			
 
				+void FrontendFreeStateContents(struct FrontendState* state);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/log_lut.c
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/log_lut.c
@@ -0,0 +1,30 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/log_lut.h"
			
 
				+const uint16_t kLogLut[]
			
 
				+#ifndef _MSC_VER
			
 
				+    __attribute__((aligned(4)))
			
 
				+#endif  // _MSV_VER
			
 
				+    = {0,    224,  442,  654,  861,  1063, 1259, 1450, 1636, 1817, 1992, 2163,
			
 
				+       2329, 2490, 2646, 2797, 2944, 3087, 3224, 3358, 3487, 3611, 3732, 3848,
			
 
				+       3960, 4068, 4172, 4272, 4368, 4460, 4549, 4633, 4714, 4791, 4864, 4934,
			
 
				+       5001, 5063, 5123, 5178, 5231, 5280, 5326, 5368, 5408, 5444, 5477, 5507,
			
 
				+       5533, 5557, 5578, 5595, 5610, 5622, 5631, 5637, 5640, 5641, 5638, 5633,
			
 
				+       5626, 5615, 5602, 5586, 5568, 5547, 5524, 5498, 5470, 5439, 5406, 5370,
			
 
				+       5332, 5291, 5249, 5203, 5156, 5106, 5054, 5000, 4944, 4885, 4825, 4762,
			
 
				+       4697, 4630, 4561, 4490, 4416, 4341, 4264, 4184, 4103, 4020, 3935, 3848,
			
 
				+       3759, 3668, 3575, 3481, 3384, 3286, 3186, 3084, 2981, 2875, 2768, 2659,
			
 
				+       2549, 2437, 2323, 2207, 2090, 1971, 1851, 1729, 1605, 1480, 1353, 1224,
			
 
				+       1094, 963,  830,  695,  559,  421,  282,  142,  0,    0};
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/log_lut.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/log_lut.h
@@ -0,0 +1,40 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Number of segments in the log lookup table. The table will be kLogSegments+1
			
 
				+// in length (with some padding).
			
 
				+#define kLogSegments 128
			
 
				+#define kLogSegmentsLog2 7
			
 
				+
			
 
				+// Scale used by lookup table.
			
 
				+#define kLogScale 65536
			
 
				+#define kLogScaleLog2 16
			
 
				+#define kLogCoeff 45426
			
 
				+
			
 
				+extern const uint16_t kLogLut[];
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/log_scale.c
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/log_scale.c
@@ -0,0 +1,83 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
			
 
				+
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/log_lut.h"
			
 
				+
			
 
				+#define kuint16max 0x0000FFFF
			
 
				+
			
 
				+// The following functions implement integer logarithms of various sizes. The
			
 
				+// approximation is calculated according to method described in
			
 
				+//       www.inti.gob.ar/electronicaeinformatica/instrumentacion/utic/
			
 
				+//       publicaciones/SPL2007/Log10-spl07.pdf
			
 
				+// It first calculates log2 of the input and then converts it to natural
			
 
				+// logarithm.
			
 
				+
			
 
				+static uint32_t Log2FractionPart(const uint32_t x, const uint32_t log2x) {
			
 
				+  // Part 1
			
 
				+  int32_t frac = x - (1LL << log2x);
			
 
				+  if (log2x < kLogScaleLog2) {
			
 
				+    frac <<= kLogScaleLog2 - log2x;
			
 
				+  } else {
			
 
				+    frac >>= log2x - kLogScaleLog2;
			
 
				+  }
			
 
				+  // Part 2
			
 
				+  const uint32_t base_seg = frac >> (kLogScaleLog2 - kLogSegmentsLog2);
			
 
				+  const uint32_t seg_unit =
			
 
				+      (((uint32_t)1) << kLogScaleLog2) >> kLogSegmentsLog2;
			
 
				+
			
 
				+  const int32_t c0 = kLogLut[base_seg];
			
 
				+  const int32_t c1 = kLogLut[base_seg + 1];
			
 
				+  const int32_t seg_base = seg_unit * base_seg;
			
 
				+  const int32_t rel_pos = ((c1 - c0) * (frac - seg_base)) >> kLogScaleLog2;
			
 
				+  return frac + c0 + rel_pos;
			
 
				+}
			
 
				+
			
 
				+static uint32_t Log(const uint32_t x, const uint32_t scale_shift) {
			
 
				+  const uint32_t integer = MostSignificantBit32(x) - 1;
			
 
				+  const uint32_t fraction = Log2FractionPart(x, integer);
			
 
				+  const uint32_t log2 = (integer << kLogScaleLog2) + fraction;
			
 
				+  const uint32_t round = kLogScale / 2;
			
 
				+  const uint32_t loge = (((uint64_t)kLogCoeff) * log2 + round) >> kLogScaleLog2;
			
 
				+  // Finally scale to our output scale
			
 
				+  const uint32_t loge_scaled = ((loge << scale_shift) + round) >> kLogScaleLog2;
			
 
				+  return loge_scaled;
			
 
				+}
			
 
				+
			
 
				+uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
			
 
				+                        int signal_size, int correction_bits) {
			
 
				+  const int scale_shift = state->scale_shift;
			
 
				+  uint16_t* output = (uint16_t*)signal;
			
 
				+  uint16_t* ret = output;
			
 
				+  int i;
			
 
				+  for (i = 0; i < signal_size; ++i) {
			
 
				+    uint32_t value = *signal++;
			
 
				+    if (state->enable_log) {
			
 
				+      if (correction_bits < 0) {
			
 
				+        value >>= -correction_bits;
			
 
				+      } else {
			
 
				+        value <<= correction_bits;
			
 
				+      }
			
 
				+      if (value > 1) {
			
 
				+        value = Log(value, scale_shift);
			
 
				+      } else {
			
 
				+        value = 0;
			
 
				+      }
			
 
				+    }
			
 
				+    *output++ = (value < kuint16max) ? value : kuint16max;
			
 
				+  }
			
 
				+  return ret;
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/log_scale.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/log_scale.h
@@ -0,0 +1,39 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+struct LogScaleState {
			
 
				+  int enable_log;
			
 
				+  int scale_shift;
			
 
				+};
			
 
				+
			
 
				+// Applies a fixed point logarithm to the signal and converts it to 16 bit. Note
			
 
				+// that the signal array will be modified.
			
 
				+uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
			
 
				+                        int signal_size, int correction_bits);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.c
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.c
@@ -0,0 +1,27 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h"
			
 
				+
			
 
				+void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config) {
			
 
				+  config->enable_log = 1;
			
 
				+  config->scale_shift = 6;
			
 
				+}
			
 
				+
			
 
				+int LogScalePopulateState(const struct LogScaleConfig* config,
			
 
				+                          struct LogScaleState* state) {
			
 
				+  state->enable_log = config->enable_log;
			
 
				+  state->scale_shift = config->scale_shift;
			
 
				+  return 1;
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h
@@ -0,0 +1,45 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+struct LogScaleConfig {
			
 
				+  // set to false (0) to disable this module
			
 
				+  int enable_log;
			
 
				+  // scale results by 2^(scale_shift)
			
 
				+  int scale_shift;
			
 
				+};
			
 
				+
			
 
				+// Populates the LogScaleConfig with "sane" default values.
			
 
				+void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config);
			
 
				+
			
 
				+// Allocates any buffers.
			
 
				+int LogScalePopulateState(const struct LogScaleConfig* config,
			
 
				+                          struct LogScaleState* state);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.c
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.c
@@ -0,0 +1,51 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
			
 
				+
			
 
				+#include <string.h>
			
 
				+
			
 
				+void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal) {
			
 
				+  int i;
			
 
				+  for (i = 0; i < state->num_channels; ++i) {
			
 
				+    const uint32_t smoothing =
			
 
				+        ((i & 1) == 0) ? state->even_smoothing : state->odd_smoothing;
			
 
				+    const uint32_t one_minus_smoothing = (1 << kNoiseReductionBits) - smoothing;
			
 
				+
			
 
				+    // Update the estimate of the noise.
			
 
				+    const uint32_t signal_scaled_up = signal[i] << state->smoothing_bits;
			
 
				+    uint32_t estimate =
			
 
				+        (((uint64_t)signal_scaled_up * smoothing) +
			
 
				+         ((uint64_t)state->estimate[i] * one_minus_smoothing)) >>
			
 
				+        kNoiseReductionBits;
			
 
				+    state->estimate[i] = estimate;
			
 
				+
			
 
				+    // Make sure that we can't get a negative value for the signal - estimate.
			
 
				+    if (estimate > signal_scaled_up) {
			
 
				+      estimate = signal_scaled_up;
			
 
				+    }
			
 
				+
			
 
				+    const uint32_t floor =
			
 
				+        ((uint64_t)signal[i] * state->min_signal_remaining) >>
			
 
				+        kNoiseReductionBits;
			
 
				+    const uint32_t subtracted =
			
 
				+        (signal_scaled_up - estimate) >> state->smoothing_bits;
			
 
				+    const uint32_t output = subtracted > floor ? subtracted : floor;
			
 
				+    signal[i] = output;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void NoiseReductionReset(struct NoiseReductionState* state) {
			
 
				+  memset(state->estimate, 0, sizeof(*state->estimate) * state->num_channels);
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h
@@ -0,0 +1,46 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
			
 
				+
			
 
				+#define kNoiseReductionBits 14
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+struct NoiseReductionState {
			
 
				+  int smoothing_bits;
			
 
				+  uint16_t even_smoothing;
			
 
				+  uint16_t odd_smoothing;
			
 
				+  uint16_t min_signal_remaining;
			
 
				+  int num_channels;
			
 
				+  uint32_t* estimate;
			
 
				+};
			
 
				+
			
 
				+// Removes stationary noise from each channel of the signal using a low pass
			
 
				+// filter.
			
 
				+void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal);
			
 
				+
			
 
				+void NoiseReductionReset(struct NoiseReductionState* state);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.c
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.c
@@ -0,0 +1,45 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h"
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config) {
			
 
				+  config->smoothing_bits = 10;
			
 
				+  config->even_smoothing = 0.025;
			
 
				+  config->odd_smoothing = 0.06;
			
 
				+  config->min_signal_remaining = 0.05;
			
 
				+}
			
 
				+
			
 
				+int NoiseReductionPopulateState(const struct NoiseReductionConfig* config,
			
 
				+                                struct NoiseReductionState* state,
			
 
				+                                int num_channels) {
			
 
				+  state->smoothing_bits = config->smoothing_bits;
			
 
				+  state->odd_smoothing = config->odd_smoothing * (1 << kNoiseReductionBits);
			
 
				+  state->even_smoothing = config->even_smoothing * (1 << kNoiseReductionBits);
			
 
				+  state->min_signal_remaining =
			
 
				+      config->min_signal_remaining * (1 << kNoiseReductionBits);
			
 
				+  state->num_channels = num_channels;
			
 
				+  state->estimate = calloc(state->num_channels, sizeof(*state->estimate));
			
 
				+  if (state->estimate == NULL) {
			
 
				+    fprintf(stderr, "Failed to alloc estimate buffer\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+  return 1;
			
 
				+}
			
 
				+
			
 
				+void NoiseReductionFreeStateContents(struct NoiseReductionState* state) {
			
 
				+  free(state->estimate);
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h
@@ -0,0 +1,50 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
			
 
				+
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+struct NoiseReductionConfig {
			
 
				+  // scale the signal up by 2^(smoothing_bits) before reduction
			
 
				+  int smoothing_bits;
			
 
				+  // smoothing coefficient for even-numbered channels
			
 
				+  float even_smoothing;
			
 
				+  // smoothing coefficient for odd-numbered channels
			
 
				+  float odd_smoothing;
			
 
				+  // fraction of signal to preserve (1.0 disables this module)
			
 
				+  float min_signal_remaining;
			
 
				+};
			
 
				+
			
 
				+// Populates the NoiseReductionConfig with "sane" default values.
			
 
				+void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config);
			
 
				+
			
 
				+// Allocates any buffers.
			
 
				+int NoiseReductionPopulateState(const struct NoiseReductionConfig* config,
			
 
				+                                struct NoiseReductionState* state,
			
 
				+                                int num_channels);
			
 
				+
			
 
				+// Frees any allocated buffers.
			
 
				+void NoiseReductionFreeStateContents(struct NoiseReductionState* state);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c
@@ -0,0 +1,56 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
			
 
				+
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
			
 
				+
			
 
				+int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut) {
			
 
				+  if (x <= 2) {
			
 
				+    return lut[x];
			
 
				+  }
			
 
				+
			
 
				+  const int16_t interval = MostSignificantBit32(x);
			
 
				+  lut += 4 * interval - 6;
			
 
				+
			
 
				+  const int16_t frac =
			
 
				+      ((interval < 11) ? (x << (11 - interval)) : (x >> (interval - 11))) &
			
 
				+      0x3FF;
			
 
				+
			
 
				+  int32_t result = ((int32_t)lut[2] * frac) >> 5;
			
 
				+  result += (int32_t)((uint32_t)lut[1] << 5);
			
 
				+  result *= frac;
			
 
				+  result = (result + (1 << 14)) >> 15;
			
 
				+  result += lut[0];
			
 
				+  return (int16_t)result;
			
 
				+}
			
 
				+
			
 
				+uint32_t PcanShrink(const uint32_t x) {
			
 
				+  if (x < (2 << kPcanSnrBits)) {
			
 
				+    return (x * x) >> (2 + 2 * kPcanSnrBits - kPcanOutputBits);
			
 
				+  } else {
			
 
				+    return (x >> (kPcanSnrBits - kPcanOutputBits)) - (1 << kPcanOutputBits);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PcanGainControlApply(struct PcanGainControlState* state,
			
 
				+                          uint32_t* signal) {
			
 
				+  int i;
			
 
				+  for (i = 0; i < state->num_channels; ++i) {
			
 
				+    const uint32_t gain =
			
 
				+        WideDynamicFunction(state->noise_estimate[i], state->gain_lut);
			
 
				+    const uint32_t snr = ((uint64_t)signal[i] * gain) >> state->snr_shift;
			
 
				+    signal[i] = PcanShrink(snr);
			
 
				+  }
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h
@@ -0,0 +1,47 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#define kPcanSnrBits 12
			
 
				+#define kPcanOutputBits 6
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Details at https://research.google/pubs/pub45911.pdf
			
 
				+struct PcanGainControlState {
			
 
				+  int enable_pcan;
			
 
				+  uint32_t* noise_estimate;
			
 
				+  int num_channels;
			
 
				+  int16_t* gain_lut;
			
 
				+  int32_t snr_shift;
			
 
				+};
			
 
				+
			
 
				+int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut);
			
 
				+
			
 
				+uint32_t PcanShrink(const uint32_t x);
			
 
				+
			
 
				+void PcanGainControlApply(struct PcanGainControlState* state, uint32_t* signal);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c
@@ -0,0 +1,92 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
			
 
				+
			
 
				+#include <math.h>
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#define kint16max 0x00007FFF
			
 
				+
			
 
				+void PcanGainControlFillConfigWithDefaults(
			
 
				+    struct PcanGainControlConfig* config) {
			
 
				+  config->enable_pcan = 0;
			
 
				+  config->strength = 0.95;
			
 
				+  config->offset = 80.0;
			
 
				+  config->gain_bits = 21;
			
 
				+}
			
 
				+
			
 
				+int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
			
 
				+                               int32_t input_bits, uint32_t x) {
			
 
				+  const float x_as_float = ((float)x) / ((uint32_t)1 << input_bits);
			
 
				+  const float gain_as_float =
			
 
				+      ((uint32_t)1 << config->gain_bits) *
			
 
				+      powf(x_as_float + config->offset, -config->strength);
			
 
				+
			
 
				+  if (gain_as_float > kint16max) {
			
 
				+    return kint16max;
			
 
				+  }
			
 
				+  return (int16_t)(gain_as_float + 0.5f);
			
 
				+}
			
 
				+
			
 
				+int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
			
 
				+                                 struct PcanGainControlState* state,
			
 
				+                                 uint32_t* noise_estimate,
			
 
				+                                 const int num_channels,
			
 
				+                                 const uint16_t smoothing_bits,
			
 
				+                                 const int32_t input_correction_bits) {
			
 
				+  state->enable_pcan = config->enable_pcan;
			
 
				+  if (!state->enable_pcan) {
			
 
				+    return 1;
			
 
				+  }
			
 
				+  state->noise_estimate = noise_estimate;
			
 
				+  state->num_channels = num_channels;
			
 
				+  state->gain_lut = malloc(kWideDynamicFunctionLUTSize * sizeof(int16_t));
			
 
				+  if (state->gain_lut == NULL) {
			
 
				+    fprintf(stderr, "Failed to allocate gain LUT\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+  state->snr_shift = config->gain_bits - input_correction_bits - kPcanSnrBits;
			
 
				+
			
 
				+  const int32_t input_bits = smoothing_bits - input_correction_bits;
			
 
				+  state->gain_lut[0] = PcanGainLookupFunction(config, input_bits, 0);
			
 
				+  state->gain_lut[1] = PcanGainLookupFunction(config, input_bits, 1);
			
 
				+  state->gain_lut -= 6;
			
 
				+  int interval;
			
 
				+  for (interval = 2; interval <= kWideDynamicFunctionBits; ++interval) {
			
 
				+    const uint32_t x0 = (uint32_t)1 << (interval - 1);
			
 
				+    const uint32_t x1 = x0 + (x0 >> 1);
			
 
				+    const uint32_t x2 =
			
 
				+        (interval == kWideDynamicFunctionBits) ? x0 + (x0 - 1) : 2 * x0;
			
 
				+
			
 
				+    const int16_t y0 = PcanGainLookupFunction(config, input_bits, x0);
			
 
				+    const int16_t y1 = PcanGainLookupFunction(config, input_bits, x1);
			
 
				+    const int16_t y2 = PcanGainLookupFunction(config, input_bits, x2);
			
 
				+
			
 
				+    const int32_t diff1 = (int32_t)y1 - y0;
			
 
				+    const int32_t diff2 = (int32_t)y2 - y0;
			
 
				+    const int32_t a1 = 4 * diff1 - diff2;
			
 
				+    const int32_t a2 = diff2 - a1;
			
 
				+
			
 
				+    state->gain_lut[4 * interval] = y0;
			
 
				+    state->gain_lut[4 * interval + 1] = (int16_t)a1;
			
 
				+    state->gain_lut[4 * interval + 2] = (int16_t)a2;
			
 
				+  }
			
 
				+  state->gain_lut += 6;
			
 
				+  return 1;
			
 
				+}
			
 
				+
			
 
				+void PcanGainControlFreeStateContents(struct PcanGainControlState* state) {
			
 
				+  free(state->gain_lut);
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h
@@ -0,0 +1,57 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
			
 
				+
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
			
 
				+
			
 
				+#define kWideDynamicFunctionBits 32
			
 
				+#define kWideDynamicFunctionLUTSize (4 * kWideDynamicFunctionBits - 3)
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+struct PcanGainControlConfig {
			
 
				+  // set to false (0) to disable this module
			
 
				+  int enable_pcan;
			
 
				+  // gain normalization exponent (0.0 disables, 1.0 full strength)
			
 
				+  float strength;
			
 
				+  // positive value added in the normalization denominator
			
 
				+  float offset;
			
 
				+  // number of fractional bits in the gain
			
 
				+  int gain_bits;
			
 
				+};
			
 
				+
			
 
				+void PcanGainControlFillConfigWithDefaults(
			
 
				+    struct PcanGainControlConfig* config);
			
 
				+
			
 
				+int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
			
 
				+                               int32_t input_bits, uint32_t x);
			
 
				+
			
 
				+int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
			
 
				+                                 struct PcanGainControlState* state,
			
 
				+                                 uint32_t* noise_estimate,
			
 
				+                                 const int num_channels,
			
 
				+                                 const uint16_t smoothing_bits,
			
 
				+                                 const int32_t input_correction_bits);
			
 
				+
			
 
				+void PcanGainControlFreeStateContents(struct PcanGainControlState* state);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/window.c
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/window.c
@@ -0,0 +1,70 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
			
 
				+
			
 
				+#include <string.h>
			
 
				+
			
 
				+int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
			
 
				+                         size_t num_samples, size_t* num_samples_read) {
			
 
				+  const int size = state->size;
			
 
				+
			
 
				+  // Copy samples from the samples buffer over to our local input.
			
 
				+  size_t max_samples_to_copy = state->size - state->input_used;
			
 
				+  if (max_samples_to_copy > num_samples) {
			
 
				+    max_samples_to_copy = num_samples;
			
 
				+  }
			
 
				+  memcpy(state->input + state->input_used, samples,
			
 
				+         max_samples_to_copy * sizeof(*samples));
			
 
				+  *num_samples_read = max_samples_to_copy;
			
 
				+  state->input_used += max_samples_to_copy;
			
 
				+
			
 
				+  if (state->input_used < state->size) {
			
 
				+    // We don't have enough samples to compute a window.
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  // Apply the window to the input.
			
 
				+  const int16_t* coefficients = state->coefficients;
			
 
				+  const int16_t* input = state->input;
			
 
				+  int16_t* output = state->output;
			
 
				+  int i;
			
 
				+  int16_t max_abs_output_value = 0;
			
 
				+  for (i = 0; i < size; ++i) {
			
 
				+    int16_t new_value =
			
 
				+        (((int32_t)*input++) * *coefficients++) >> kFrontendWindowBits;
			
 
				+    *output++ = new_value;
			
 
				+    if (new_value < 0) {
			
 
				+      new_value = -new_value;
			
 
				+    }
			
 
				+    if (new_value > max_abs_output_value) {
			
 
				+      max_abs_output_value = new_value;
			
 
				+    }
			
 
				+  }
			
 
				+  // Shuffle the input down by the step size, and update how much we have used.
			
 
				+  memmove(state->input, state->input + state->step,
			
 
				+          sizeof(*state->input) * (state->size - state->step));
			
 
				+  state->input_used -= state->step;
			
 
				+  state->max_abs_output_value = max_abs_output_value;
			
 
				+
			
 
				+  // Indicate that the output buffer is valid for the next stage.
			
 
				+  return 1;
			
 
				+}
			
 
				+
			
 
				+void WindowReset(struct WindowState* state) {
			
 
				+  memset(state->input, 0, state->size * sizeof(*state->input));
			
 
				+  memset(state->output, 0, state->size * sizeof(*state->output));
			
 
				+  state->input_used = 0;
			
 
				+  state->max_abs_output_value = 0;
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/window.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/window.h
@@ -0,0 +1,49 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#define kFrontendWindowBits 12
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+struct WindowState {
			
 
				+  size_t size;
			
 
				+  int16_t* coefficients;
			
 
				+  size_t step;
			
 
				+
			
 
				+  int16_t* input;
			
 
				+  size_t input_used;
			
 
				+  int16_t* output;
			
 
				+  int16_t max_abs_output_value;
			
 
				+};
			
 
				+
			
 
				+// Applies a window to the samples coming in, stepping forward at the given
			
 
				+// rate.
			
 
				+int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
			
 
				+                         size_t num_samples, size_t* num_samples_read);
			
 
				+
			
 
				+void WindowReset(struct WindowState* state);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/window_util.c
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/window_util.c
@@ -0,0 +1,73 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
			
 
				+
			
 
				+#include <math.h>
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+
			
 
				+// Some platforms don't have M_PI
			
 
				+#ifndef M_PI
			
 
				+#define M_PI 3.14159265358979323846
			
 
				+#endif
			
 
				+
			
 
				+void WindowFillConfigWithDefaults(struct WindowConfig* config) {
			
 
				+  config->size_ms = 25;
			
 
				+  config->step_size_ms = 10;
			
 
				+}
			
 
				+
			
 
				+int WindowPopulateState(const struct WindowConfig* config,
			
 
				+                        struct WindowState* state, int sample_rate) {
			
 
				+  state->size = config->size_ms * sample_rate / 1000;
			
 
				+  state->step = config->step_size_ms * sample_rate / 1000;
			
 
				+
			
 
				+  state->coefficients = malloc(state->size * sizeof(*state->coefficients));
			
 
				+  if (state->coefficients == NULL) {
			
 
				+    fprintf(stderr, "Failed to allocate window coefficients\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  // Populate the window values.
			
 
				+  const float arg = M_PI * 2.0 / ((float)state->size);
			
 
				+  int i;
			
 
				+  for (i = 0; i < state->size; ++i) {
			
 
				+    float float_value = 0.5 - (0.5 * cos(arg * (i + 0.5)));
			
 
				+    // Scale it to fixed point and round it.
			
 
				+    state->coefficients[i] =
			
 
				+        floor(float_value * (1 << kFrontendWindowBits) + 0.5);
			
 
				+  }
			
 
				+
			
 
				+  state->input_used = 0;
			
 
				+  state->input = malloc(state->size * sizeof(*state->input));
			
 
				+  if (state->input == NULL) {
			
 
				+    fprintf(stderr, "Failed to allocate window input\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  state->output = malloc(state->size * sizeof(*state->output));
			
 
				+  if (state->output == NULL) {
			
 
				+    fprintf(stderr, "Failed to allocate window output\n");
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  return 1;
			
 
				+}
			
 
				+
			
 
				+void WindowFreeStateContents(struct WindowState* state) {
			
 
				+  free(state->coefficients);
			
 
				+  free(state->input);
			
 
				+  free(state->output);
			
 
				+}
			
--- a/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/window_util.h
+++ b/tensorflow/lite/micro/tensorflow/lite/experimental/microfrontend/lib/window_util.h
@@ -0,0 +1,45 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
			
 
				+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
			
 
				+
			
 
				+#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+struct WindowConfig {
			
 
				+  // length of window frame in milliseconds
			
 
				+  size_t size_ms;
			
 
				+  // length of step for next frame in milliseconds
			
 
				+  size_t step_size_ms;
			
 
				+};
			
 
				+
			
 
				+// Populates the WindowConfig with "sane" default values.
			
 
				+void WindowFillConfigWithDefaults(struct WindowConfig* config);
			
 
				+
			
 
				+// Allocates any buffers.
			
 
				+int WindowPopulateState(const struct WindowConfig* config,
			
 
				+                        struct WindowState* state, int sample_rate);
			
 
				+
			
 
				+// Frees any allocated buffers.
			
 
				+void WindowFreeStateContents(struct WindowState* state);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/SConscript
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/SConscript
@@ -0,0 +1,29 @@
 
				+from building import *
			
 
				+import os
			
 
				+
			
 
				+cwd = GetCurrentDir()
			
 
				+src = Glob('*.c') + Glob('*.cc') + Glob('internal/*.cc')
			
 
				+
			
 
				+#.
			
 
				+root =  str(Dir('#'))
			
 
				+packages = os.path.join(root, 'packages')
			
 
				+file_list = os.listdir(packages)
			
 
				+for f in file_list:
			
 
				+    if(f.split('-')[0] == 'TensorflowLiteMicro'):
			
 
				+        tflm_pkg = os.path.join(packages, f)
			
 
				+        break
			
 
				+#./third_party/flatbuffer/include
			
 
				+flatbuffer = os.path.join(tflm_pkg, "third_party/flatbuffers/include")
			
 
				+#./third_party/gemmlowp
			
 
				+gemmlowp = os.path.join(tflm_pkg, "third_party/gemmlowp")
			
 
				+#./third_party/kissfft
			
 
				+kissfft = os.path.join(tflm_pkg, "third_party/kissfft")
			
 
				+#./third_party/ruy
			
 
				+ruy = os.path.join(tflm_pkg, "third_party/ruy")
			
 
				+
			
 
				+
			
 
				+CPPPATH = [tflm_pkg, flatbuffer, gemmlowp, kissfft, ruy]
			
 
				+
			
 
				+group = DefineGroup('lite', src, depend = ['PKG_USING_TENSORFLOWLITEMICRO'], CPPPATH = CPPPATH)
			
 
				+
			
 
				+Return('group')
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/common.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/common.h
@@ -0,0 +1,956 @@
 
				+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
			
 
				+
			
 
				+#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
			
 
				+#ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
			
 
				+#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#include <functional>
			
 
				+
			
 
				+#include "fixedpoint/fixedpoint.h"
			
 
				+#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				+#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+constexpr int kReverseShift = -1;
			
 
				+
			
 
				+inline void GetActivationMinMax(FusedActivationFunctionType ac,
			
 
				+                                float* output_activation_min,
			
 
				+                                float* output_activation_max) {
			
 
				+  switch (ac) {
			
 
				+    case FusedActivationFunctionType::kNone:
			
 
				+      *output_activation_min = std::numeric_limits<float>::lowest();
			
 
				+      *output_activation_max = std::numeric_limits<float>::max();
			
 
				+      break;
			
 
				+    case FusedActivationFunctionType::kRelu:
			
 
				+      *output_activation_min = 0.f;
			
 
				+      *output_activation_max = std::numeric_limits<float>::max();
			
 
				+      break;
			
 
				+    case FusedActivationFunctionType::kRelu1:
			
 
				+      *output_activation_min = -1.f;
			
 
				+      *output_activation_max = 1.f;
			
 
				+      break;
			
 
				+    case FusedActivationFunctionType::kRelu6:
			
 
				+      *output_activation_min = 0.f;
			
 
				+      *output_activation_max = 6.f;
			
 
				+      break;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline T ActivationFunctionWithMinMax(T x, T output_activation_min,
			
 
				+                                      T output_activation_max) {
			
 
				+  using std::max;
			
 
				+  using std::min;
			
 
				+  return min(max(x, output_activation_min), output_activation_max);
			
 
				+}
			
 
				+
			
 
				+// Legacy function, left for compatibility only.
			
 
				+template <FusedActivationFunctionType Ac>
			
 
				+float ActivationFunction(float x) {
			
 
				+  float output_activation_min, output_activation_max;
			
 
				+  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
			
 
				+  return ActivationFunctionWithMinMax(x, output_activation_min,
			
 
				+                                      output_activation_max);
			
 
				+}
			
 
				+
			
 
				+inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
			
 
				+                         const float* bias_data, int array_size,
			
 
				+                         float* array_data) {
			
 
				+  // Note: see b/132215220: in May 2019 we thought it would be OK to replace
			
 
				+  // this with the Eigen one-liner:
			
 
				+  //   return (array.colwise() + bias).cwiseMin(clamp_max).cwiseMin(clamp_max).
			
 
				+  // This turned out to severely regress performance: +4ms (i.e. 8%) on
			
 
				+  // MobileNet v2 / 1.0 / 224. So we keep custom NEON code for now.
			
 
				+  TFLITE_DCHECK_EQ((array_size % bias_size), 0);
			
 
				+#ifdef USE_NEON
			
 
				+  float* array_ptr = array_data;
			
 
				+  float* array_end_ptr = array_ptr + array_size;
			
 
				+  const auto clamp_min_vec = vdupq_n_f32(clamp_min);
			
 
				+  const auto clamp_max_vec = vdupq_n_f32(clamp_max);
			
 
				+  for (; array_ptr != array_end_ptr; array_ptr += bias_size) {
			
 
				+    int i = 0;
			
 
				+    for (; i <= bias_size - 16; i += 16) {
			
 
				+      auto b0 = vld1q_f32(bias_data + i);
			
 
				+      auto b1 = vld1q_f32(bias_data + i + 4);
			
 
				+      auto b2 = vld1q_f32(bias_data + i + 8);
			
 
				+      auto b3 = vld1q_f32(bias_data + i + 12);
			
 
				+      auto a0 = vld1q_f32(array_ptr + i);
			
 
				+      auto a1 = vld1q_f32(array_ptr + i + 4);
			
 
				+      auto a2 = vld1q_f32(array_ptr + i + 8);
			
 
				+      auto a3 = vld1q_f32(array_ptr + i + 12);
			
 
				+      auto x0 = vaddq_f32(a0, b0);
			
 
				+      auto x1 = vaddq_f32(a1, b1);
			
 
				+      auto x2 = vaddq_f32(a2, b2);
			
 
				+      auto x3 = vaddq_f32(a3, b3);
			
 
				+      x0 = vmaxq_f32(clamp_min_vec, x0);
			
 
				+      x1 = vmaxq_f32(clamp_min_vec, x1);
			
 
				+      x2 = vmaxq_f32(clamp_min_vec, x2);
			
 
				+      x3 = vmaxq_f32(clamp_min_vec, x3);
			
 
				+      x0 = vminq_f32(clamp_max_vec, x0);
			
 
				+      x1 = vminq_f32(clamp_max_vec, x1);
			
 
				+      x2 = vminq_f32(clamp_max_vec, x2);
			
 
				+      x3 = vminq_f32(clamp_max_vec, x3);
			
 
				+      vst1q_f32(array_ptr + i, x0);
			
 
				+      vst1q_f32(array_ptr + i + 4, x1);
			
 
				+      vst1q_f32(array_ptr + i + 8, x2);
			
 
				+      vst1q_f32(array_ptr + i + 12, x3);
			
 
				+    }
			
 
				+    for (; i <= bias_size - 4; i += 4) {
			
 
				+      auto b = vld1q_f32(bias_data + i);
			
 
				+      auto a = vld1q_f32(array_ptr + i);
			
 
				+      auto x = vaddq_f32(a, b);
			
 
				+      x = vmaxq_f32(clamp_min_vec, x);
			
 
				+      x = vminq_f32(clamp_max_vec, x);
			
 
				+      vst1q_f32(array_ptr + i, x);
			
 
				+    }
			
 
				+    for (; i < bias_size; i++) {
			
 
				+      array_ptr[i] = ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i],
			
 
				+                                                  clamp_min, clamp_max);
			
 
				+    }
			
 
				+  }
			
 
				+#else  // not NEON
			
 
				+  for (int array_offset = 0; array_offset < array_size;
			
 
				+       array_offset += bias_size) {
			
 
				+    for (int i = 0; i < bias_size; i++) {
			
 
				+      array_data[array_offset + i] = ActivationFunctionWithMinMax(
			
 
				+          array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max);
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+    int32_t x, int32_t quantized_multiplier, int left_shift) {
			
 
				+  using gemmlowp::RoundingDivideByPOT;
			
 
				+  using gemmlowp::SaturatingRoundingDoublingHighMul;
			
 
				+  return RoundingDivideByPOT(
			
 
				+      SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
			
 
				+}
			
 
				+
			
 
				+inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
			
 
				+    int32_t x, int32_t quantized_multiplier, int left_shift) {
			
 
				+  using gemmlowp::SaturatingRoundingDoublingHighMul;
			
 
				+  return SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
			
 
				+                                           quantized_multiplier);
			
 
				+}
			
 
				+
			
 
				+inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
			
 
				+                                             int32_t quantized_multiplier,
			
 
				+                                             int shift) {
			
 
				+  using gemmlowp::RoundingDivideByPOT;
			
 
				+  using gemmlowp::SaturatingRoundingDoublingHighMul;
			
 
				+  int left_shift = shift > 0 ? shift : 0;
			
 
				+  int right_shift = shift > 0 ? 0 : -shift;
			
 
				+  return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
			
 
				+                                 x * (1 << left_shift), quantized_multiplier),
			
 
				+                             right_shift);
			
 
				+}
			
 
				+
			
 
				+inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
			
 
				+                                             int32_t quantized_multiplier,
			
 
				+                                             int shift) {
			
 
				+  // Inputs:
			
 
				+  // - quantized_multiplier has fixed point at bit 31
			
 
				+  // - shift is -31 to +7 (negative for right shift)
			
 
				+  //
			
 
				+  // Assumptions: The following input ranges are assumed
			
 
				+  // - quantize_scale>=0  (the usual range is (1<<30) to (1>>31)-1)
			
 
				+  // - scaling is chosen so final scaled result fits in int32_t
			
 
				+  // - input x is in the range -(1<<47) <= x < (1<<47)
			
 
				+  assert(quantized_multiplier >= 0);
			
 
				+  assert(shift >= -31 && shift < 8);
			
 
				+
			
 
				+  int32_t reduced_multiplier = (quantized_multiplier + (1 << 15)) >> 16;
			
 
				+  int total_shift = 15 - shift;
			
 
				+  x = (x * (int64_t)reduced_multiplier) + ((int64_t)1 << (total_shift - 1));
			
 
				+  int32_t result = x >> total_shift;
			
 
				+  return result;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+int CountLeadingZeros(T integer_input) {
			
 
				+  static_assert(std::is_unsigned<T>::value,
			
 
				+                "Only unsigned integer types handled.");
			
 
				+#if defined(__GNUC__)
			
 
				+  return integer_input ? __builtin_clz(integer_input)
			
 
				+                       : std::numeric_limits<T>::digits;
			
 
				+#else
			
 
				+  if (integer_input == 0) {
			
 
				+    return std::numeric_limits<T>::digits;
			
 
				+  }
			
 
				+
			
 
				+  const T one_in_leading_positive = static_cast<T>(1)
			
 
				+                                    << (std::numeric_limits<T>::digits - 1);
			
 
				+  int leading_zeros = 0;
			
 
				+  while (integer_input < one_in_leading_positive) {
			
 
				+    integer_input <<= 1;
			
 
				+    ++leading_zeros;
			
 
				+  }
			
 
				+  return leading_zeros;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline int CountLeadingSignBits(T integer_input) {
			
 
				+  static_assert(std::is_signed<T>::value, "Only signed integer types handled.");
			
 
				+#if defined(__GNUC__) && !defined(__clang__)
			
 
				+  return integer_input ? __builtin_clrsb(integer_input)
			
 
				+                       : std::numeric_limits<T>::digits;
			
 
				+#else
			
 
				+  using U = typename std::make_unsigned<T>::type;
			
 
				+  return integer_input >= 0
			
 
				+             ? CountLeadingZeros(static_cast<U>(integer_input)) - 1
			
 
				+         : integer_input != std::numeric_limits<T>::min()
			
 
				+             ? CountLeadingZeros(2 * static_cast<U>(-integer_input) - 1)
			
 
				+             : 0;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+// Use "count leading zeros" helper functions to do a fast Floor(log_2(x)).
			
 
				+template <typename Integer>
			
 
				+inline Integer FloorLog2(Integer n) {
			
 
				+  static_assert(std::is_integral<Integer>::value, "");
			
 
				+  static_assert(std::is_signed<Integer>::value, "");
			
 
				+  static_assert(sizeof(Integer) == 4 || sizeof(Integer) == 8, "");
			
 
				+  TFLITE_CHECK_GT(n, 0);
			
 
				+  if (sizeof(Integer) == 4) {
			
 
				+    return 30 - CountLeadingSignBits(n);
			
 
				+  } else {
			
 
				+    return 62 - CountLeadingSignBits(n);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
			
 
				+// softmax
			
 
				+inline void gen_lut(const std::function<double(double)>& func, double min,
			
 
				+                    double max, int16_t* table, const int num) {
			
 
				+  // size of table should equal to num + 1
			
 
				+  // last element only for slope calculation
			
 
				+  double step = (max - min) / (num - 1);
			
 
				+  double half_step = step / 2.0;
			
 
				+  for (int i = 0; i < num - 1; i++) {
			
 
				+    double sample_val = TfLiteRound(func(min + i * step) * 32768.0);
			
 
				+    double midpoint_interp_val =
			
 
				+        TfLiteRound((func(min + (i + 1) * step) * 32768.0 +
			
 
				+                     TfLiteRound(func(min + i * step) * 32768.0)) /
			
 
				+                    2.0);
			
 
				+    double midpoint_val =
			
 
				+        TfLiteRound(func(min + i * step + half_step) * 32768.0);
			
 
				+    double midpoint_err = midpoint_interp_val - midpoint_val;
			
 
				+    double bias = TfLiteRound(midpoint_err / 2.0);
			
 
				+    table[i] = std::min(std::max(sample_val - bias, -32768.0), 32767.0);
			
 
				+  }
			
 
				+  table[num - 1] =
			
 
				+      std::min(std::max(TfLiteRound(func(max) * 32768.0), -32768.0), 32767.0);
			
 
				+}
			
 
				+
			
 
				+// int16_t func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
			
 
				+inline int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) {
			
 
				+  // 512 base value, lut[513] only for calculate slope
			
 
				+  uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
			
 
				+  assert(index < 512 && "LUT index out of range.");
			
 
				+  int16_t offset = value & 0x7f;
			
 
				+
			
 
				+  // base and slope are Q0.15
			
 
				+  int16_t base = lut[index];
			
 
				+  int16_t slope = lut[index + 1] - lut[index];
			
 
				+
			
 
				+  // Q0.15 * Q0.7 = Q0.22
			
 
				+  // Round and convert from Q0.22 to Q0.15
			
 
				+  int32_t delta = (static_cast<int32_t>(slope) * offset + 64) >> 7;
			
 
				+
			
 
				+  // Q0.15 + Q0.15
			
 
				+  return base + delta;
			
 
				+}
			
 
				+
			
 
				+// Table of sigmoid(i/24) at 0.16 format - 256 elements.
			
 
				+
			
 
				+// We use combined sigmoid and tanh look-up table, since
			
 
				+// tanh(x) = 2*sigmoid(2*x) -1.
			
 
				+// Both functions are symmetric, so the LUT table is only needed
			
 
				+// for the absolute value of the input.
			
 
				+static const uint16_t sigmoid_table_uint16[256] = {
			
 
				+    32768, 33451, 34133, 34813, 35493, 36169, 36843, 37513, 38180, 38841, 39498,
			
 
				+    40149, 40794, 41432, 42064, 42688, 43304, 43912, 44511, 45102, 45683, 46255,
			
 
				+    46817, 47369, 47911, 48443, 48964, 49475, 49975, 50464, 50942, 51409, 51865,
			
 
				+    52311, 52745, 53169, 53581, 53983, 54374, 54755, 55125, 55485, 55834, 56174,
			
 
				+    56503, 56823, 57133, 57433, 57724, 58007, 58280, 58544, 58800, 59048, 59288,
			
 
				+    59519, 59743, 59959, 60168, 60370, 60565, 60753, 60935, 61110, 61279, 61441,
			
 
				+    61599, 61750, 61896, 62036, 62172, 62302, 62428, 62549, 62666, 62778, 62886,
			
 
				+    62990, 63090, 63186, 63279, 63368, 63454, 63536, 63615, 63691, 63765, 63835,
			
 
				+    63903, 63968, 64030, 64090, 64148, 64204, 64257, 64308, 64357, 64405, 64450,
			
 
				+    64494, 64536, 64576, 64614, 64652, 64687, 64721, 64754, 64786, 64816, 64845,
			
 
				+    64873, 64900, 64926, 64950, 64974, 64997, 65019, 65039, 65060, 65079, 65097,
			
 
				+    65115, 65132, 65149, 65164, 65179, 65194, 65208, 65221, 65234, 65246, 65258,
			
 
				+    65269, 65280, 65291, 65301, 65310, 65319, 65328, 65337, 65345, 65352, 65360,
			
 
				+    65367, 65374, 65381, 65387, 65393, 65399, 65404, 65410, 65415, 65420, 65425,
			
 
				+    65429, 65433, 65438, 65442, 65445, 65449, 65453, 65456, 65459, 65462, 65465,
			
 
				+    65468, 65471, 65474, 65476, 65479, 65481, 65483, 65485, 65488, 65489, 65491,
			
 
				+    65493, 65495, 65497, 65498, 65500, 65501, 65503, 65504, 65505, 65507, 65508,
			
 
				+    65509, 65510, 65511, 65512, 65513, 65514, 65515, 65516, 65517, 65517, 65518,
			
 
				+    65519, 65520, 65520, 65521, 65522, 65522, 65523, 65523, 65524, 65524, 65525,
			
 
				+    65525, 65526, 65526, 65526, 65527, 65527, 65528, 65528, 65528, 65529, 65529,
			
 
				+    65529, 65529, 65530, 65530, 65530, 65530, 65531, 65531, 65531, 65531, 65531,
			
 
				+    65532, 65532, 65532, 65532, 65532, 65532, 65533, 65533, 65533, 65533, 65533,
			
 
				+    65533, 65533, 65533, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534,
			
 
				+    65534, 65534, 65535};
			
 
				+
			
 
				+// TODO(b/77858996): Add these to gemmlowp.
			
 
				+template <typename IntegerType>
			
 
				+IntegerType SaturatingAddNonGemmlowp(IntegerType a, IntegerType b) {
			
 
				+  static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
			
 
				+  return a;
			
 
				+}
			
 
				+
			
 
				+template <>
			
 
				+inline std::int32_t SaturatingAddNonGemmlowp(std::int32_t a, std::int32_t b) {
			
 
				+  std::int64_t a64 = a;
			
 
				+  std::int64_t b64 = b;
			
 
				+  std::int64_t sum = a64 + b64;
			
 
				+  return static_cast<std::int32_t>(std::min(
			
 
				+      static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
			
 
				+      std::max(
			
 
				+          static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
			
 
				+          sum)));
			
 
				+}
			
 
				+
			
 
				+template <typename tRawType, int tIntegerBits>
			
 
				+gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingAddNonGemmlowp(
			
 
				+    gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
			
 
				+    gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
			
 
				+  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
			
 
				+      SaturatingAddNonGemmlowp(a.raw(), b.raw()));
			
 
				+}
			
 
				+
			
 
				+template <typename IntegerType>
			
 
				+IntegerType SaturatingSub(IntegerType a, IntegerType b) {
			
 
				+  static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
			
 
				+  return a;
			
 
				+}
			
 
				+
			
 
				+template <>
			
 
				+inline std::int16_t SaturatingSub(std::int16_t a, std::int16_t b) {
			
 
				+  std::int32_t a32 = a;
			
 
				+  std::int32_t b32 = b;
			
 
				+  std::int32_t diff = a32 - b32;
			
 
				+  return static_cast<std::int16_t>(
			
 
				+      std::min(static_cast<int32_t>(32767),
			
 
				+               std::max(static_cast<int32_t>(-32768), diff)));
			
 
				+}
			
 
				+
			
 
				+template <>
			
 
				+inline std::int32_t SaturatingSub(std::int32_t a, std::int32_t b) {
			
 
				+  std::int64_t a64 = a;
			
 
				+  std::int64_t b64 = b;
			
 
				+  std::int64_t diff = a64 - b64;
			
 
				+  return static_cast<std::int32_t>(std::min(
			
 
				+      static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
			
 
				+      std::max(
			
 
				+          static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
			
 
				+          diff)));
			
 
				+}
			
 
				+
			
 
				+template <typename tRawType, int tIntegerBits>
			
 
				+gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub(
			
 
				+    gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
			
 
				+    gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
			
 
				+  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
			
 
				+      SaturatingSub(a.raw(), b.raw()));
			
 
				+}
			
 
				+// End section to be moved to gemmlowp.
			
 
				+
			
 
				+template <typename IntegerType>
			
 
				+IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) {
			
 
				+  if (exponent == 0) {
			
 
				+    return x;
			
 
				+  }
			
 
				+  using ScalarIntegerType =
			
 
				+      typename gemmlowp::FixedPointRawTypeTraits<IntegerType>::ScalarRawType;
			
 
				+  const IntegerType min =
			
 
				+      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::min());
			
 
				+  const IntegerType max =
			
 
				+      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::max());
			
 
				+  const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType);
			
 
				+
			
 
				+  const std::int32_t threshold =
			
 
				+      ((1 << (ScalarIntegerTypeBits - 1 - exponent)) - 1);
			
 
				+  const IntegerType positive_mask =
			
 
				+      gemmlowp::MaskIfGreaterThan(x, gemmlowp::Dup<IntegerType>(threshold));
			
 
				+  const IntegerType negative_mask =
			
 
				+      gemmlowp::MaskIfLessThan(x, gemmlowp::Dup<IntegerType>(-threshold));
			
 
				+
			
 
				+  IntegerType result = gemmlowp::ShiftLeft(x, exponent);
			
 
				+  result = gemmlowp::SelectUsingMask(positive_mask, max, result);
			
 
				+  result = gemmlowp::SelectUsingMask(negative_mask, min, result);
			
 
				+  return result;
			
 
				+}
			
 
				+
			
 
				+// If we want to leave IntegerBits fixed, then multiplication
			
 
				+// by a power of two has to be saturating/rounding, not exact anymore.
			
 
				+template <typename tRawType, int tIntegerBits>
			
 
				+gemmlowp::FixedPoint<tRawType, tIntegerBits>
			
 
				+SaturatingRoundingMultiplyByPOTParam(
			
 
				+    gemmlowp::FixedPoint<tRawType, tIntegerBits> a, int exponent) {
			
 
				+  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
			
 
				+      SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
			
 
				+}
			
 
				+
			
 
				+// Convert int32_t multiplier to int16_t with rounding.
			
 
				+inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32_t,
			
 
				+                                            int16_t* multiplier_int16_t) {
			
 
				+  TFLITE_DCHECK_GE(multiplier_int32_t, 0);
			
 
				+  static constexpr int32_t kRoundingOffset = 1 << 15;
			
 
				+  if (multiplier_int32_t >=
			
 
				+      std::numeric_limits<int32_t>::max() - kRoundingOffset) {
			
 
				+    *multiplier_int16_t = std::numeric_limits<int16_t>::max();
			
 
				+    return;
			
 
				+  }
			
 
				+  const int32_t result = (multiplier_int32_t + kRoundingOffset) >> 16;
			
 
				+  TFLITE_DCHECK_LE(result << 16, multiplier_int32_t + kRoundingOffset);
			
 
				+  TFLITE_DCHECK_GT(result << 16, multiplier_int32_t - kRoundingOffset);
			
 
				+  *multiplier_int16_t = result;
			
 
				+  TFLITE_DCHECK_EQ(*multiplier_int16_t, result);
			
 
				+}
			
 
				+
			
 
				+// Minimum output bits to accommodate log of maximum input range.  It actually
			
 
				+// does not matter if one considers, say, [-64,64] or [-64,64).
			
 
				+//
			
 
				+// For example, run this through Octave:
			
 
				+// [0:127; ...
			
 
				+//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ...
			
 
				+//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2))]
			
 
				+constexpr int min_log_x_output_bits(int input_bits) {
			
 
				+  return input_bits > 90   ? 7
			
 
				+         : input_bits > 44 ? 6
			
 
				+         : input_bits > 21 ? 5
			
 
				+         : input_bits > 10 ? 4
			
 
				+         : input_bits > 4  ? 3
			
 
				+         : input_bits > 1  ? 2
			
 
				+                           : 1;
			
 
				+}
			
 
				+
			
 
				+// Although currently the name of this function says that it cannot handle
			
 
				+// values less than 1, in practice it can handle as low as 1/x_max, where
			
 
				+// x_max is the largest representable input.  In other words, the output range
			
 
				+// is symmetric.
			
 
				+template <int OutputIntegerBits, int InputIntegerBits>
			
 
				+inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
			
 
				+log_x_for_x_greater_than_or_equal_to_1_impl(
			
 
				+    gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
			
 
				+  // assert(__builtin_clz(0u) >= std::numeric_limits<uint32_t>::digits - 1);
			
 
				+  // assert(__builtin_clz(0u) <= std::numeric_limits<uint32_t>::digits);
			
 
				+  using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
			
 
				+  // The reason for accumulating the result with an extra bit of headroom is
			
 
				+  // that z_pow_2_adj * log_2 might be saturated, and adding num_scaled *
			
 
				+  // recip_denom will otherwise introduce an error.
			
 
				+  static constexpr int kAccumIntegerBits = OutputIntegerBits + 1;
			
 
				+  using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumIntegerBits>;
			
 
				+
			
 
				+  const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
			
 
				+      FixedPoint0, 1488522236, std::log(2.0));
			
 
				+  const FixedPoint0 sqrt_sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
			
 
				+      FixedPoint0, 1805811301, std::sqrt(std::sqrt(0.5)));
			
 
				+  const FixedPoint0 sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
			
 
				+      FixedPoint0, 1518500250, std::sqrt(0.5));
			
 
				+  const FixedPoint0 one_quarter =
			
 
				+      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPoint0, 536870912, 1.0 / 4.0);
			
 
				+
			
 
				+  const FixedPoint0 alpha_n = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
			
 
				+      FixedPoint0, 117049297, 11.0 / 240.0 * std::sqrt(std::sqrt(2.0)));
			
 
				+  const FixedPoint0 alpha_d = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
			
 
				+      FixedPoint0, 127690142, 1.0 / 20.0 * std::sqrt(std::sqrt(2.0)));
			
 
				+  const FixedPoint0 alpha_i = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
			
 
				+      FixedPoint0, 1057819769,
			
 
				+      2.0 / std::sqrt(std::sqrt(2.0)) - std::sqrt(std::sqrt(2.0)));
			
 
				+  const FixedPoint0 alpha_f = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
			
 
				+      FixedPoint0, 638450708, 1.0 / 4.0 * std::sqrt(std::sqrt(2.0)));
			
 
				+
			
 
				+  const FixedPointAccum shifted_quarter =
			
 
				+      gemmlowp::Rescale<kAccumIntegerBits>(one_quarter);
			
 
				+
			
 
				+  // Reinterpret the input value as Q0.31, because we will figure out the
			
 
				+  // required shift "ourselves" instead of using, say, Rescale.
			
 
				+  FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
			
 
				+  // z_a_pow_2 = input_integer_bits - z_a_headroom;
			
 
				+  int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32_t>(z_a.raw()));
			
 
				+  FixedPoint0 r_a_tmp =
			
 
				+      SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
			
 
				+  const int32_t r_a_raw =
			
 
				+      SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1);
			
 
				+  // z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25);
			
 
				+  // z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25,
			
 
				+  //                   InputIntegerBits - z_b_headroom - 0.25);
			
 
				+  const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp(
			
 
				+      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
			
 
				+          InputIntegerBits - z_a_headroom_plus_1, 31 - kAccumIntegerBits)),
			
 
				+      shifted_quarter);
			
 
				+
			
 
				+  // z_b is treated like z_a, but premultiplying by sqrt(0.5).
			
 
				+  FixedPoint0 z_b = z_a * sqrt_half;
			
 
				+  int z_b_headroom = CountLeadingZeros(static_cast<uint32_t>(z_b.raw())) - 1;
			
 
				+  const int32_t r_b_raw =
			
 
				+      SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
			
 
				+  const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
			
 
				+      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
			
 
				+          InputIntegerBits - z_b_headroom, 31 - kAccumIntegerBits)),
			
 
				+      shifted_quarter);
			
 
				+
			
 
				+  const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw));
			
 
				+  const FixedPointAccum z_pow_2_adj = FixedPointAccum::FromRaw(
			
 
				+      std::max(z_a_pow_2_adj.raw(), z_b_pow_2_adj.raw()));
			
 
				+
			
 
				+  const FixedPoint0 p = gemmlowp::RoundingHalfSum(r, sqrt_sqrt_half);
			
 
				+  FixedPoint0 q = r - sqrt_sqrt_half;
			
 
				+  q = q + q;
			
 
				+
			
 
				+  const FixedPoint0 common_sq = q * q;
			
 
				+  const FixedPoint0 num = q * r + q * common_sq * alpha_n;
			
 
				+  const FixedPoint0 denom_minus_one_0 =
			
 
				+      p * (alpha_i + q + alpha_d * common_sq) + alpha_f * q;
			
 
				+  const FixedPoint0 recip_denom =
			
 
				+      one_over_one_plus_x_for_x_in_0_1(denom_minus_one_0);
			
 
				+
			
 
				+  const FixedPointAccum num_scaled = gemmlowp::Rescale<kAccumIntegerBits>(num);
			
 
				+  return gemmlowp::Rescale<OutputIntegerBits>(z_pow_2_adj * log_2 +
			
 
				+                                              num_scaled * recip_denom);
			
 
				+}
			
 
				+
			
 
				+template <int OutputIntegerBits, int InputIntegerBits>
			
 
				+inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
			
 
				+log_x_for_x_greater_than_or_equal_to_1(
			
 
				+    gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
			
 
				+  static_assert(
			
 
				+      OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits),
			
 
				+      "Output integer bits must be sufficient to accommodate logs of inputs.");
			
 
				+  return log_x_for_x_greater_than_or_equal_to_1_impl<OutputIntegerBits,
			
 
				+                                                     InputIntegerBits>(
			
 
				+      input_val);
			
 
				+}
			
 
				+
			
 
				+inline int32_t GetReciprocal(int32_t x, int x_integer_digits,
			
 
				+                             int* num_bits_over_unit) {
			
 
				+  int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(x));
			
 
				+  // This is the number of bits to the left of the binary point above 1.0.
			
 
				+  // Consider x=1.25.  In that case shifted_scale=0.8 and
			
 
				+  // no later adjustment will be needed.
			
 
				+  *num_bits_over_unit = x_integer_digits - headroom_plus_one;
			
 
				+  const int32_t shifted_sum_minus_one =
			
 
				+      static_cast<int32_t>((static_cast<uint32_t>(x) << headroom_plus_one) -
			
 
				+                           (static_cast<uint32_t>(1) << 31));
			
 
				+
			
 
				+  gemmlowp::FixedPoint<int32_t, 0> shifted_scale =
			
 
				+      gemmlowp::one_over_one_plus_x_for_x_in_0_1(
			
 
				+          gemmlowp::FixedPoint<int32_t, 0>::FromRaw(shifted_sum_minus_one));
			
 
				+  return shifted_scale.raw();
			
 
				+}
			
 
				+
			
 
				+inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
			
 
				+                                             int32_t* output_inv_sqrt,
			
 
				+                                             int* output_shift) {
			
 
				+  TFLITE_DCHECK_GE(input, 0);
			
 
				+  if (input <= 1) {
			
 
				+    // Handle the input value 1 separately to avoid overflow in that case
			
 
				+    // in the general computation below (b/143972021). Also handle 0 as if it
			
 
				+    // were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid
			
 
				+    // but rare/unrealistic input value. We can expect both to occur in some
			
 
				+    // incompletely trained models, but probably not in fully trained models.
			
 
				+    *output_inv_sqrt = std::numeric_limits<std::int32_t>::max();
			
 
				+    *output_shift = 0;
			
 
				+    return;
			
 
				+  }
			
 
				+  TFLITE_DCHECK_GT(input, 1);
			
 
				+  *output_shift = 11;
			
 
				+  while (input >= (1 << 29)) {
			
 
				+    input /= 4;
			
 
				+    ++*output_shift;
			
 
				+  }
			
 
				+  const unsigned max_left_shift_bits =
			
 
				+      CountLeadingZeros(static_cast<uint32_t>(input)) - 1;
			
 
				+  const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
			
 
				+  const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
			
 
				+  *output_shift -= left_shift_bit_pairs;
			
 
				+  input <<= 2 * left_shift_bit_pairs;
			
 
				+  TFLITE_DCHECK_GE(input, (1 << 27));
			
 
				+  TFLITE_DCHECK_LT(input, (1 << 29));
			
 
				+  using gemmlowp::FixedPoint;
			
 
				+  using gemmlowp::Rescale;
			
 
				+  using gemmlowp::SaturatingRoundingMultiplyByPOT;
			
 
				+  // Using 3 integer bits gives us enough room for the internal arithmetic in
			
 
				+  // this Newton-Raphson iteration.
			
 
				+  using F3 = FixedPoint<int32_t, 3>;
			
 
				+  using F0 = FixedPoint<int32_t, 0>;
			
 
				+  const F3 fixedpoint_input = F3::FromRaw(input >> 1);
			
 
				+  const F3 fixedpoint_half_input =
			
 
				+      SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
			
 
				+  const F3 fixedpoint_half_three =
			
 
				+      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
			
 
				+  // Newton-Raphson iteration
			
 
				+  // Naive unoptimized starting guess: x = 1
			
 
				+  F3 x = F3::One();
			
 
				+  // Naive unoptimized number of iterations: 5
			
 
				+  for (int i = 0; i < 5; i++) {
			
 
				+    const F3 x3 = Rescale<3>(x * x * x);
			
 
				+    x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3);
			
 
				+  }
			
 
				+  const F0 fixedpoint_half_sqrt_2 =
			
 
				+      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
			
 
				+  x = x * fixedpoint_half_sqrt_2;
			
 
				+  *output_inv_sqrt = x.raw();
			
 
				+  if (*output_shift < 0) {
			
 
				+    *output_inv_sqrt <<= -*output_shift;
			
 
				+    *output_shift = 0;
			
 
				+  }
			
 
				+  // Convert right shift (right is positive) to left shift.
			
 
				+  *output_shift *= reverse_shift;
			
 
				+}
			
 
				+
			
 
				+// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
			
 
				+// BROADCASTING.
			
 
				+//
			
 
				+// NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
			
 
				+// rectangular array of numbers.
			
 
				+//
			
 
				+// NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
			
 
				+// However, as Dims<N> is to be deprecated, this class exists as an adaptor
			
 
				+// to enable simple unoptimized implementations of element-wise broadcasting
			
 
				+// operations.
			
 
				+template <int N>
			
 
				+struct NdArrayDesc {
			
 
				+  // The "extent" of each dimension. Indices along dimension d must be in the
			
 
				+  // half-open interval [0, extents[d]).
			
 
				+  int extents[N];
			
 
				+
			
 
				+  // The number of *elements* (not bytes) between consecutive indices of each
			
 
				+  // dimension.
			
 
				+  int strides[N];
			
 
				+};
			
 
				+
			
 
				+// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
			
 
				+// BROADCASTING.
			
 
				+//
			
 
				+// Same as Offset(), except takes as NdArrayDesc<N> instead of Dims<N>.
			
 
				+inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2,
			
 
				+                            int i3) {
			
 
				+  TFLITE_DCHECK(i0 >= 0 && i0 < desc.extents[0]);
			
 
				+  TFLITE_DCHECK(i1 >= 0 && i1 < desc.extents[1]);
			
 
				+  TFLITE_DCHECK(i2 >= 0 && i2 < desc.extents[2]);
			
 
				+  TFLITE_DCHECK(i3 >= 0 && i3 < desc.extents[3]);
			
 
				+  return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] +
			
 
				+         i3 * desc.strides[3];
			
 
				+}
			
 
				+
			
 
				+inline int SubscriptToIndex(const NdArrayDesc<5>& desc, int indexes[5]) {
			
 
				+  return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
			
 
				+         indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] +
			
 
				+         indexes[4] * desc.strides[4];
			
 
				+}
			
 
				+
			
 
				+// Given the dimensions of the operands for an element-wise binary broadcast,
			
 
				+// adjusts them so that they can be directly iterated over with simple loops.
			
 
				+// Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and
			
 
				+// 'desc1_out'. 'desc0_out' and 'desc1_out' cannot be nullptr.
			
 
				+//
			
 
				+// This function assumes that the two input shapes are compatible up to
			
 
				+// broadcasting and the shorter one has already been prepended with 1s to be the
			
 
				+// same length. E.g., if shape0 is (1, 16, 16, 64) and shape1 is (1, 64),
			
 
				+// shape1 must already have been prepended to be (1, 1, 1, 64). Recall that
			
 
				+// Dims<N> refer to shapes in reverse order. In this case, input0_dims will be
			
 
				+// (64, 16, 16, 1) and input1_dims will be (64, 1, 1, 1).
			
 
				+//
			
 
				+// When two shapes are compatible up to broadcasting, for each dimension d,
			
 
				+// the input extents are either equal, or one of them is 1.
			
 
				+//
			
 
				+// This function performs the following for each dimension d:
			
 
				+// - If the extents are equal, then do nothing since the loop that walks over
			
 
				+//   both of the input arrays is correct.
			
 
				+// - Otherwise, one (and only one) of the extents must be 1. Say extent0 is 1
			
 
				+//   and extent1 is e1. Then set extent0 to e1 and stride0 *to 0*. This allows
			
 
				+//   array0 to be referenced *at any index* in dimension d and still access the
			
 
				+//   same slice.
			
 
				+template <int N>
			
 
				+inline void NdArrayDescsForElementwiseBroadcast(const Dims<N>& input0_dims,
			
 
				+                                                const Dims<N>& input1_dims,
			
 
				+                                                NdArrayDesc<N>* desc0_out,
			
 
				+                                                NdArrayDesc<N>* desc1_out) {
			
 
				+  TFLITE_DCHECK(desc0_out != nullptr);
			
 
				+  TFLITE_DCHECK(desc1_out != nullptr);
			
 
				+
			
 
				+  // Copy dims to desc.
			
 
				+  for (int i = 0; i < N; ++i) {
			
 
				+    desc0_out->extents[i] = input0_dims.sizes[i];
			
 
				+    desc0_out->strides[i] = input0_dims.strides[i];
			
 
				+    desc1_out->extents[i] = input1_dims.sizes[i];
			
 
				+    desc1_out->strides[i] = input1_dims.strides[i];
			
 
				+  }
			
 
				+
			
 
				+  // Walk over each dimension. If the extents are equal do nothing.
			
 
				+  // Otherwise, set the desc with extent 1 to have extent equal to the other and
			
 
				+  // stride 0.
			
 
				+  for (int i = 0; i < N; ++i) {
			
 
				+    const int extent0 = ArraySize(input0_dims, i);
			
 
				+    const int extent1 = ArraySize(input1_dims, i);
			
 
				+    if (extent0 != extent1) {
			
 
				+      if (extent0 == 1) {
			
 
				+        desc0_out->strides[i] = 0;
			
 
				+        desc0_out->extents[i] = extent1;
			
 
				+      } else {
			
 
				+        TFLITE_DCHECK_EQ(extent1, 1);
			
 
				+        desc1_out->strides[i] = 0;
			
 
				+        desc1_out->extents[i] = extent0;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Copies dims to desc, calculating strides.
			
 
				+template <int N>
			
 
				+inline void CopyDimsToDesc(const RuntimeShape& input_shape,
			
 
				+                           NdArrayDesc<N>* desc_out) {
			
 
				+  int desc_stride = 1;
			
 
				+  for (int i = N - 1; i >= 0; --i) {
			
 
				+    desc_out->extents[i] = input_shape.Dims(i);
			
 
				+    desc_out->strides[i] = desc_stride;
			
 
				+    desc_stride *= input_shape.Dims(i);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <int N>
			
 
				+inline void NdArrayDescsForElementwiseBroadcast(
			
 
				+    const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
			
 
				+    NdArrayDesc<N>* desc0_out, NdArrayDesc<N>* desc1_out) {
			
 
				+  TFLITE_DCHECK(desc0_out != nullptr);
			
 
				+  TFLITE_DCHECK(desc1_out != nullptr);
			
 
				+
			
 
				+  auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
			
 
				+  auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
			
 
				+
			
 
				+  // Copy dims to desc, calculating strides.
			
 
				+  CopyDimsToDesc<N>(extended_input0_shape, desc0_out);
			
 
				+  CopyDimsToDesc<N>(extended_input1_shape, desc1_out);
			
 
				+
			
 
				+  // Walk over each dimension. If the extents are equal do nothing.
			
 
				+  // Otherwise, set the desc with extent 1 to have extent equal to the other and
			
 
				+  // stride 0.
			
 
				+  for (int i = 0; i < N; ++i) {
			
 
				+    const int extent0 = extended_input0_shape.Dims(i);
			
 
				+    const int extent1 = extended_input1_shape.Dims(i);
			
 
				+    if (extent0 != extent1) {
			
 
				+      if (extent0 == 1) {
			
 
				+        desc0_out->strides[i] = 0;
			
 
				+        desc0_out->extents[i] = extent1;
			
 
				+      } else {
			
 
				+        TFLITE_DCHECK_EQ(extent1, 1);
			
 
				+        desc1_out->strides[i] = 0;
			
 
				+        desc1_out->extents[i] = extent0;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <int N>
			
 
				+inline void NdArrayDescsForElementwiseBroadcast(
			
 
				+    const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
			
 
				+    const RuntimeShape& input2_shape, NdArrayDesc<N>* desc0_out,
			
 
				+    NdArrayDesc<N>* desc1_out, NdArrayDesc<N>* desc2_out) {
			
 
				+  TFLITE_DCHECK(desc0_out != nullptr);
			
 
				+  TFLITE_DCHECK(desc1_out != nullptr);
			
 
				+  TFLITE_DCHECK(desc2_out != nullptr);
			
 
				+
			
 
				+  auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
			
 
				+  auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
			
 
				+  auto extended_input2_shape = RuntimeShape::ExtendedShape(N, input2_shape);
			
 
				+
			
 
				+  // Copy dims to desc, calculating strides.
			
 
				+  CopyDimsToDesc<N>(extended_input0_shape, desc0_out);
			
 
				+  CopyDimsToDesc<N>(extended_input1_shape, desc1_out);
			
 
				+  CopyDimsToDesc<N>(extended_input2_shape, desc2_out);
			
 
				+
			
 
				+  // Walk over each dimension. If the extents are equal do nothing.
			
 
				+  // Otherwise, set the desc with extent 1 to have extent equal to the other and
			
 
				+  // stride 0.
			
 
				+  for (int i = 0; i < N; ++i) {
			
 
				+    const int extent0 = extended_input0_shape.Dims(i);
			
 
				+    const int extent1 = extended_input1_shape.Dims(i);
			
 
				+    const int extent2 = extended_input2_shape.Dims(i);
			
 
				+
			
 
				+    int extent = extent0;
			
 
				+    if (extent1 != 1) extent = extent1;
			
 
				+    if (extent2 != 1) extent = extent2;
			
 
				+
			
 
				+    TFLITE_DCHECK(extent0 == 1 || extent0 == extent);
			
 
				+    TFLITE_DCHECK(extent1 == 1 || extent1 == extent);
			
 
				+    TFLITE_DCHECK(extent2 == 1 || extent2 == extent);
			
 
				+
			
 
				+    if (!(extent0 == extent1 && extent1 == extent2)) {
			
 
				+      if (extent0 == 1) {
			
 
				+        desc0_out->strides[i] = 0;
			
 
				+        desc0_out->extents[i] = extent;
			
 
				+      }
			
 
				+      if (extent1 == 1) {
			
 
				+        desc1_out->strides[i] = 0;
			
 
				+        desc1_out->extents[i] = extent;
			
 
				+      }
			
 
				+      if (extent2 == 1) {
			
 
				+        desc2_out->strides[i] = 0;
			
 
				+        desc2_out->extents[i] = extent;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Detailed implementation of NDOpsHelper, the indexes must be a zero array.
			
 
				+// This implementation is equivalent to N nested loops. Ex, if N=4, it can be
			
 
				+// re-writen as:
			
 
				+// for (int b = 0; b < output.extents[0]; ++b) {
			
 
				+//   for (int y = 0; y < output.extents[1]; ++y) {
			
 
				+//     for (int x = 0; x < output.extents[2]; ++x) {
			
 
				+//       for (int c = 0; c < output.extents[3]; ++c) {
			
 
				+//           calc({b,y,x,c});
			
 
				+//       }
			
 
				+//     }
			
 
				+//   }
			
 
				+// }
			
 
				+template <int N, int DIM, typename Calc>
			
 
				+typename std::enable_if<DIM != N - 1, void>::type NDOpsHelperImpl(
			
 
				+    const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) {
			
 
				+  for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) {
			
 
				+    NDOpsHelperImpl<N, DIM + 1, Calc>(output, calc, indexes);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <int N, int DIM, typename Calc>
			
 
				+typename std::enable_if<DIM == N - 1, void>::type NDOpsHelperImpl(
			
 
				+    const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) {
			
 
				+  for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) {
			
 
				+    calc(indexes);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Execute the calc function in the innermost iteration based on the shape of
			
 
				+// the output. The calc function should take a single argument of type int[N].
			
 
				+template <int N, typename Calc>
			
 
				+inline void NDOpsHelper(const NdArrayDesc<N>& output, const Calc& calc) {
			
 
				+  int indexes[N] = {0};
			
 
				+  NDOpsHelperImpl<N, 0, Calc>(output, calc, indexes);
			
 
				+}
			
 
				+// Copied from gemmlowp::RoundDown when we dropped direct dependency on
			
 
				+// gemmlowp.
			
 
				+//
			
 
				+// Returns the runtime argument rounded down to the nearest multiple of
			
 
				+// the fixed Modulus.
			
 
				+template <unsigned Modulus, typename Integer>
			
 
				+Integer RoundDown(Integer i) {
			
 
				+  return i - (i % Modulus);
			
 
				+}
			
 
				+
			
 
				+// Copied from gemmlowp::RoundUp when we dropped direct dependency on
			
 
				+// gemmlowp.
			
 
				+//
			
 
				+// Returns the runtime argument rounded up to the nearest multiple of
			
 
				+// the fixed Modulus.
			
 
				+template <unsigned Modulus, typename Integer>
			
 
				+Integer RoundUp(Integer i) {
			
 
				+  return RoundDown<Modulus>(i + Modulus - 1);
			
 
				+}
			
 
				+
			
 
				+// Copied from gemmlowp::CeilQuotient when we dropped direct dependency on
			
 
				+// gemmlowp.
			
 
				+//
			
 
				+// Returns the quotient a / b rounded up ('ceil') to the nearest integer.
			
 
				+template <typename Integer>
			
 
				+Integer CeilQuotient(Integer a, Integer b) {
			
 
				+  return (a + b - 1) / b;
			
 
				+}
			
 
				+
			
 
				+// This function is a copy of gemmlowp::HowManyThreads, copied when we dropped
			
 
				+// the direct dependency of internal/optimized/ on gemmlowp.
			
 
				+//
			
 
				+// It computes a reasonable number of threads to use for a GEMM of shape
			
 
				+// (rows, cols, depth).
			
 
				+//
			
 
				+// TODO(b/131910176): get rid of this function by switching each call site
			
 
				+// to its own more sensible logic for its own workload.
			
 
				+template <int KernelRows>
			
 
				+inline int LegacyHowManyThreads(int max_num_threads, int rows, int cols,
			
 
				+                                int depth) {
			
 
				+  // Early-exit in the default case where multi-threading is disabled.
			
 
				+  if (max_num_threads == 1) {
			
 
				+    return 1;
			
 
				+  }
			
 
				+
			
 
				+  // Ensure that each thread has KernelRows rows to process, if at all possible.
			
 
				+  int thread_count = std::min(max_num_threads, rows / KernelRows);
			
 
				+
			
 
				+  // Limit the number of threads according to the overall size of the problem.
			
 
				+  if (thread_count > 1) {
			
 
				+    // Empirically determined value.
			
 
				+    static constexpr std::uint64_t min_cubic_size_per_thread = 64 * 1024;
			
 
				+
			
 
				+    // We can only multiply two out of three sizes without risking overflow
			
 
				+    const std::uint64_t cubic_size =
			
 
				+        std::uint64_t(rows) * std::uint64_t(cols) * std::uint64_t(depth);
			
 
				+
			
 
				+    thread_count = std::min(
			
 
				+        thread_count, static_cast<int>(cubic_size / min_cubic_size_per_thread));
			
 
				+  }
			
 
				+
			
 
				+  if (thread_count < 1) {
			
 
				+    thread_count = 1;
			
 
				+  }
			
 
				+
			
 
				+  assert(thread_count > 0 && thread_count <= max_num_threads);
			
 
				+  return thread_count;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+void optimized_ops_preload_l1_stream(const T* ptr) {
			
 
				+#ifdef __GNUC__
			
 
				+  // builtin offered by GCC-compatible compilers including clang
			
 
				+  __builtin_prefetch(ptr, /* 0 means read */ 0, /* 0 means no locality */ 0);
			
 
				+#else
			
 
				+  (void)ptr;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+void optimized_ops_preload_l1_keep(const T* ptr) {
			
 
				+#ifdef __GNUC__
			
 
				+  // builtin offered by GCC-compatible compilers including clang
			
 
				+  __builtin_prefetch(ptr, /* 0 means read */ 0, /* 3 means high locality */ 3);
			
 
				+#else
			
 
				+  (void)ptr;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+void optimized_ops_prefetch_write_l1_keep(const T* ptr) {
			
 
				+#ifdef __GNUC__
			
 
				+  // builtin offered by GCC-compatible compilers including clang
			
 
				+  __builtin_prefetch(ptr, /* 1 means write */ 1, /* 3 means high locality */ 3);
			
 
				+#else
			
 
				+  (void)ptr;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/compatibility.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/compatibility.h
@@ -0,0 +1,112 @@
 
				+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
			
 
				+
			
 
				+#include <cstdint>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/op_macros.h"
			
 
				+
			
 
				+#ifndef TFLITE_DCHECK
			
 
				+#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE
			
 
				+#endif
			
 
				+
			
 
				+#ifndef TFLITE_DCHECK_EQ
			
 
				+#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ASSERT_FALSE
			
 
				+#endif
			
 
				+
			
 
				+#ifndef TFLITE_DCHECK_NE
			
 
				+#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ASSERT_FALSE
			
 
				+#endif
			
 
				+
			
 
				+#ifndef TFLITE_DCHECK_GE
			
 
				+#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
			
 
				+#endif
			
 
				+
			
 
				+#ifndef TFLITE_DCHECK_GT
			
 
				+#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ASSERT_FALSE
			
 
				+#endif
			
 
				+
			
 
				+#ifndef TFLITE_DCHECK_LE
			
 
				+#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
			
 
				+#endif
			
 
				+
			
 
				+#ifndef TFLITE_DCHECK_LT
			
 
				+#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ASSERT_FALSE
			
 
				+#endif
			
 
				+
			
 
				+// TODO(ahentz): Clean up: We should stick to the DCHECK versions.
			
 
				+#ifndef TFLITE_CHECK
			
 
				+#define TFLITE_CHECK(condition) (condition) ? (void)0 : TFLITE_ABORT
			
 
				+#endif
			
 
				+
			
 
				+#ifndef TFLITE_CHECK_EQ
			
 
				+#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT
			
 
				+#endif
			
 
				+
			
 
				+#ifndef TFLITE_CHECK_NE
			
 
				+#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ABORT
			
 
				+#endif
			
 
				+
			
 
				+#ifndef TFLITE_CHECK_GE
			
 
				+#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ABORT
			
 
				+#endif
			
 
				+
			
 
				+#ifndef TFLITE_CHECK_GT
			
 
				+#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ABORT
			
 
				+#endif
			
 
				+
			
 
				+#ifndef TFLITE_CHECK_LE
			
 
				+#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT
			
 
				+#endif
			
 
				+
			
 
				+#ifndef TFLITE_CHECK_LT
			
 
				+#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
			
 
				+#endif
			
 
				+
			
 
				+#ifndef TF_LITE_STATIC_MEMORY
			
 
				+// TODO(b/162019032): Consider removing these type-aliases.
			
 
				+using int8 = std::int8_t;
			
 
				+using uint8 = std::uint8_t;
			
 
				+using int16 = std::int16_t;
			
 
				+using uint16 = std::uint16_t;
			
 
				+using int32 = std::int32_t;
			
 
				+using uint32 = std::uint32_t;
			
 
				+#endif  // !defined(TF_LITE_STATIC_MEMORY)
			
 
				+
			
 
				+// TFLITE_DEPRECATED()
			
 
				+//
			
 
				+// Duplicated from absl/base/macros.h to avoid pulling in that library.
			
 
				+// Marks a deprecated class, struct, enum, function, method and variable
			
 
				+// declarations. The macro argument is used as a custom diagnostic message (e.g.
			
 
				+// suggestion of a better alternative).
			
 
				+//
			
 
				+// Example:
			
 
				+//
			
 
				+//   class TFLITE_DEPRECATED("Use Bar instead") Foo {...};
			
 
				+//   TFLITE_DEPRECATED("Use Baz instead") void Bar() {...}
			
 
				+//
			
 
				+// Every usage of a deprecated entity will trigger a warning when compiled with
			
 
				+// clang's `-Wdeprecated-declarations` option. This option is turned off by
			
 
				+// default, but the warnings will be reported by clang-tidy.
			
 
				+#if defined(__clang__) && __cplusplus >= 201103L
			
 
				+#define TFLITE_DEPRECATED(message) __attribute__((deprecated(message)))
			
 
				+#endif
			
 
				+
			
 
				+#ifndef TFLITE_DEPRECATED
			
 
				+#define TFLITE_DEPRECATED(message)
			
 
				+#endif
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/cppmath.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/cppmath.h
@@ -0,0 +1,40 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
			
 
				+
			
 
				+#include <cmath>
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) ||                           \
			
 
				+    (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || \
			
 
				+    defined(__ZEPHYR__)
			
 
				+#define TF_LITE_GLOBAL_STD_PREFIX
			
 
				+#else
			
 
				+#define TF_LITE_GLOBAL_STD_PREFIX //std
			
 
				+#endif
			
 
				+
			
 
				+#define DECLARE_STD_GLOBAL_SWITCH1(tf_name, std_name) \
			
 
				+  template <class T>                                  \
			
 
				+  inline T tf_name(const T x) {                       \
			
 
				+    return TF_LITE_GLOBAL_STD_PREFIX::std_name(x);    \
			
 
				+  }
			
 
				+
			
 
				+DECLARE_STD_GLOBAL_SWITCH1(TfLiteRound, round);
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/max.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/max.h
@@ -0,0 +1,35 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
			
 
				+
			
 
				+#include <cmath>
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+#if defined(TF_LITE_USE_GLOBAL_MAX) || defined(__ZEPHYR__)
			
 
				+inline float TfLiteMax(const float& x, const float& y) {
			
 
				+  return ::max(x, y);
			
 
				+}
			
 
				+#else
			
 
				+template <class T>
			
 
				+inline T TfLiteMax(const T& x, const T& y) {
			
 
				+  return ::fmax(x, y);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/min.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/min.h
@@ -0,0 +1,35 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
			
 
				+
			
 
				+#include <cmath>
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+#if defined(TF_LITE_USE_GLOBAL_MIN) || defined(__ZEPHYR__)
			
 
				+inline float TfLiteMin(const float& x, const float& y) {
			
 
				+  return ::min(x, y);
			
 
				+}
			
 
				+#else
			
 
				+template <class T>
			
 
				+inline T TfLiteMin(const T& x, const T& y) {
			
 
				+  return ::fmin(x, y);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/optimized/neon_check.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/optimized/neon_check.h
@@ -0,0 +1,40 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
			
 
				+
			
 
				+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
			
 
				+#define USE_NEON
			
 
				+#include <arm_neon.h>
			
 
				+#endif
			
 
				+
			
 
				+#if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON
			
 
				+#define USE_NEON
			
 
				+#include "NEON_2_SSE.h"
			
 
				+#endif
			
 
				+
			
 
				+// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is
			
 
				+// defined, PortableSomeFunc(args) otherwise.
			
 
				+#ifdef USE_NEON
			
 
				+// Always use Neon code
			
 
				+#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__)
			
 
				+
			
 
				+#else
			
 
				+// No NEON available: Use Portable code
			
 
				+#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__)
			
 
				+
			
 
				+#endif  // defined(USE_NEON)
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/quantization_util.cc
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/quantization_util.cc
@@ -0,0 +1,395 @@
 
				+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				+
			
 
				+#include <algorithm>
			
 
				+#include <cmath>
			
 
				+#include <limits>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace {
			
 
				+// These constants are used to manipulate the binary representation of doubles.
			
 
				+// Double-precision binary64 floating point format is:
			
 
				+// Bit |  63  |  62-52   |   51-0   |
			
 
				+//     | Sign | Exponent | Fraction |
			
 
				+// To avoid 64-bit integers as much as possible, I break this into high and
			
 
				+// low 32-bit chunks. High is:
			
 
				+// Bit |  31  |  30-20   |      19-0     |
			
 
				+//     | Sign | Exponent | High Fraction |
			
 
				+// Low is:
			
 
				+// Bit |     31-0     |
			
 
				+//     | Low Fraction |
			
 
				+// We then access the components through logical bit-wise operations to
			
 
				+// extract the parts needed, with the positions and masks derived from the
			
 
				+// layout shown above.
			
 
				+constexpr uint64_t kSignMask = 0x8000000000000000LL;
			
 
				+constexpr uint64_t kExponentMask = 0x7ff0000000000000LL;
			
 
				+constexpr int32_t kExponentShift = 52;
			
 
				+constexpr int32_t kExponentBias = 1023;
			
 
				+constexpr uint32_t kExponentIsBadNum = 0x7ff;
			
 
				+constexpr uint64_t kFractionMask = 0x000fffffffc00000LL;
			
 
				+constexpr uint32_t kFractionShift = 22;
			
 
				+constexpr uint32_t kFractionRoundingMask = 0x003fffff;
			
 
				+constexpr uint32_t kFractionRoundingThreshold = 0x00200000;
			
 
				+}  // namespace
			
 
				+
			
 
				+void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
			
 
				+                        int* shift) {
			
 
				+  if (double_multiplier == 0.) {
			
 
				+    *quantized_multiplier = 0;
			
 
				+    *shift = 0;
			
 
				+    return;
			
 
				+  }
			
 
				+#ifdef TFLITE_EMULATE_FLOAT
			
 
				+  // If we're trying to avoid the use of floating-point instructions (for
			
 
				+  // example on microcontrollers) then use an alternative implementation
			
 
				+  // that only requires integer and bitwise operations. To enable this, you
			
 
				+  // need to set the define during the build process for your platform.
			
 
				+  int64_t q_fixed = IntegerFrExp(double_multiplier, shift);
			
 
				+#else   // TFLITE_EMULATE_FLOAT
			
 
				+  const double q = std::frexp(double_multiplier, shift);
			
 
				+  auto q_fixed = static_cast<int64_t>(TfLiteRound(q * (1ll << 31)));
			
 
				+#endif  // TFLITE_EMULATE_FLOAT
			
 
				+  TFLITE_CHECK(q_fixed <= (1ll << 31));
			
 
				+  if (q_fixed == (1ll << 31)) {
			
 
				+    q_fixed /= 2;
			
 
				+    ++*shift;
			
 
				+  }
			
 
				+  TFLITE_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
			
 
				+  // A shift amount smaller than -31 would cause all bits to be shifted out
			
 
				+  // and thus all results would be zero. We implement that instead with
			
 
				+  // q_fixed==0, so as to avoid hitting issues with right-shift
			
 
				+  // operations with shift amounts greater than 31. Note that this happens
			
 
				+  // roughly when abs(double_multiplier) < 2^-31 and the present handling means
			
 
				+  // that we're effectively flushing tiny double_multiplier's to zero.
			
 
				+  // We could conceivably handle values in the range (roughly) [32, 63]
			
 
				+  // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
			
 
				+  // the present handling is just doing 'flush denormals to zero'. We could
			
 
				+  // reconsider and actually generate nonzero denormals if a need arises.
			
 
				+  if (*shift < -31) {
			
 
				+    *shift = 0;
			
 
				+    q_fixed = 0;
			
 
				+  }
			
 
				+  *quantized_multiplier = static_cast<int32_t>(q_fixed);
			
 
				+}
			
 
				+
			
 
				+void QuantizeMultiplierGreaterThanOne(double double_multiplier,
			
 
				+                                      int32_t* quantized_multiplier,
			
 
				+                                      int* left_shift) {
			
 
				+  TFLITE_CHECK_GT(double_multiplier, 1.);
			
 
				+  QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift);
			
 
				+  TFLITE_CHECK_GE(*left_shift, 0);
			
 
				+}
			
 
				+
			
 
				+void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
			
 
				+                                         int32_t* quantized_multiplier,
			
 
				+                                         int* left_shift) {
			
 
				+  TFLITE_CHECK_LT(double_multiplier, 1.);
			
 
				+  TFLITE_CHECK_GT(double_multiplier, 0.);
			
 
				+  int shift;
			
 
				+  QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
			
 
				+  TFLITE_CHECK_LE(shift, 0);
			
 
				+  *left_shift = shift;
			
 
				+}
			
 
				+
			
 
				+int64_t IntegerFrExp(double input, int* shift) {
			
 
				+  // Make sure our assumptions about the double layout hold.
			
 
				+  TFLITE_CHECK_EQ(8, sizeof(double));
			
 
				+
			
 
				+  // We want to access the bits of the input double value directly, which is
			
 
				+  // tricky to do safely, so use a union to handle the casting.
			
 
				+  union {
			
 
				+    double double_value;
			
 
				+    uint64_t double_as_uint;
			
 
				+  } cast_union;
			
 
				+  cast_union.double_value = input;
			
 
				+  const uint64_t u = cast_union.double_as_uint;
			
 
				+
			
 
				+  // If the bitfield is all zeros apart from the sign bit, this is a normalized
			
 
				+  // zero value, so return standard values for this special case.
			
 
				+  if ((u & ~kSignMask) == 0) {
			
 
				+    *shift = 0;
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  // Deal with NaNs and Infs, which are always indicated with a fixed pattern in
			
 
				+  // the exponent, and distinguished by whether the fractions are zero or
			
 
				+  // non-zero.
			
 
				+  const uint32_t exponent_part = ((u & kExponentMask) >> kExponentShift);
			
 
				+  if (exponent_part == kExponentIsBadNum) {
			
 
				+    *shift = std::numeric_limits<int>::max();
			
 
				+    if (u & kFractionMask) {
			
 
				+      // NaN, so just return zero (with the exponent set to INT_MAX).
			
 
				+      return 0;
			
 
				+    } else {
			
 
				+      // Infinity, so return +/- INT_MAX.
			
 
				+      if (u & kSignMask) {
			
 
				+        return std::numeric_limits<int64_t>::min();
			
 
				+      } else {
			
 
				+        return std::numeric_limits<int64_t>::max();
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // The shift is fairly easy to extract from the high bits of the double value,
			
 
				+  // just by masking it out and applying a bias. The std::frexp() implementation
			
 
				+  // always returns values between 0.5 and 1.0 though, whereas the exponent
			
 
				+  // assumes 1.0 to 2.0 is the standard range, so I add on one to match that
			
 
				+  // interface.
			
 
				+  *shift = (exponent_part - kExponentBias) + 1;
			
 
				+
			
 
				+  // There's an implicit high bit in the double format definition, so make sure
			
 
				+  // we include that at the top, and then reconstruct the rest of the fractional
			
 
				+  // value from the remaining fragments.
			
 
				+  int64_t fraction = 0x40000000 + ((u & kFractionMask) >> kFractionShift);
			
 
				+
			
 
				+  // We're cutting off some bits at the bottom, so to exactly match the standard
			
 
				+  // frexp implementation here we'll apply rounding by adding one to the least
			
 
				+  // significant bit of the result if the discarded portion is over half of the
			
 
				+  // maximum.
			
 
				+  if ((u & kFractionRoundingMask) > kFractionRoundingThreshold) {
			
 
				+    fraction += 1;
			
 
				+  }
			
 
				+  // Negate the fraction if the sign bit was set.
			
 
				+  if (u & kSignMask) {
			
 
				+    fraction *= -1;
			
 
				+  }
			
 
				+
			
 
				+  return fraction;
			
 
				+}
			
 
				+
			
 
				+double DoubleFromFractionAndShift(int64_t fraction, int shift) {
			
 
				+  union {
			
 
				+    double double_value;
			
 
				+    uint64_t double_as_uint;
			
 
				+  } result;
			
 
				+
			
 
				+  // Detect NaNs and infinities.
			
 
				+  if (shift == std::numeric_limits<int>::max()) {
			
 
				+    if (fraction == 0) {
			
 
				+      return std::numeric_limits<double>::quiet_NaN();
			
 
				+    } else if (fraction > 0) {
			
 
				+      return std::numeric_limits<double>::infinity();
			
 
				+    } else {
			
 
				+      return -std::numeric_limits<double>::infinity();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // Return a normalized zero for a zero fraction.
			
 
				+  if (fraction == 0) {
			
 
				+    result.double_as_uint = 0;
			
 
				+    return result.double_value;
			
 
				+  }
			
 
				+
			
 
				+  bool is_negative = (fraction < 0);
			
 
				+  int64_t encoded_fraction = is_negative ? -fraction : fraction;
			
 
				+  int64_t encoded_shift = (shift - 1);
			
 
				+  while (encoded_fraction < 0x40000000) {
			
 
				+    encoded_fraction *= 2;
			
 
				+    encoded_shift -= 1;
			
 
				+  }
			
 
				+  while (encoded_fraction > 0x80000000) {
			
 
				+    encoded_fraction /= 2;
			
 
				+    encoded_shift += 1;
			
 
				+  }
			
 
				+  encoded_fraction -= 0x40000000;
			
 
				+  if (encoded_shift < -1022) {
			
 
				+    encoded_shift = -1023;
			
 
				+  } else if (encoded_shift > 1022) {
			
 
				+    encoded_shift = 1023;
			
 
				+  }
			
 
				+  encoded_shift += kExponentBias;
			
 
				+  uint64_t encoded_sign = is_negative ? kSignMask : 0;
			
 
				+  result.double_as_uint = encoded_sign | (encoded_shift << kExponentShift) |
			
 
				+                          (encoded_fraction << kFractionShift);
			
 
				+  return result.double_value;
			
 
				+}
			
 
				+
			
 
				+double IntegerDoubleMultiply(double a, double b) {
			
 
				+  int a_shift;
			
 
				+  const int64_t a_fraction = IntegerFrExp(a, &a_shift);
			
 
				+  int b_shift;
			
 
				+  const int64_t b_fraction = IntegerFrExp(b, &b_shift);
			
 
				+  // Detect NaNs and infinities.
			
 
				+  if (a_shift == std::numeric_limits<int>::max() ||
			
 
				+      (b_shift == std::numeric_limits<int>::max())) {
			
 
				+    return std::numeric_limits<double>::quiet_NaN();
			
 
				+  }
			
 
				+  const int result_shift = a_shift + b_shift + 1;
			
 
				+  const int64_t result_fraction = (a_fraction * b_fraction) >> 32;
			
 
				+  return DoubleFromFractionAndShift(result_fraction, result_shift);
			
 
				+}
			
 
				+
			
 
				+int IntegerDoubleCompare(double a, double b) {
			
 
				+  int a_shift;
			
 
				+  const int64_t a_fraction = IntegerFrExp(a, &a_shift);
			
 
				+  int b_shift;
			
 
				+  const int64_t b_fraction = IntegerFrExp(b, &b_shift);
			
 
				+
			
 
				+  // Detect NaNs and infinities.
			
 
				+  if (a_shift == std::numeric_limits<int>::max() ||
			
 
				+      (b_shift == std::numeric_limits<int>::max())) {
			
 
				+    return 1;
			
 
				+  }
			
 
				+
			
 
				+  if ((a_fraction == 0) && (b_fraction < 0)) {
			
 
				+    return 1;
			
 
				+  } else if ((a_fraction < 0) && (b_fraction == 0)) {
			
 
				+    return -1;
			
 
				+  } else if (a_shift < b_shift) {
			
 
				+    return -1;
			
 
				+  } else if (a_shift > b_shift) {
			
 
				+    return 1;
			
 
				+  } else if (a_fraction < b_fraction) {
			
 
				+    return -1;
			
 
				+  } else if (a_fraction > b_fraction) {
			
 
				+    return 1;
			
 
				+  } else {
			
 
				+    return 0;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PreprocessSoftmaxScaling(double beta, double input_scale,
			
 
				+                              int input_integer_bits,
			
 
				+                              int32_t* quantized_multiplier, int* left_shift) {
			
 
				+  // If the overall multiplier (input and beta) is large, then exp() of an
			
 
				+  // input difference of 1 scaled by this will be large.  In other words, we
			
 
				+  // can cap the multiplier and know that, when it is used, the output will be
			
 
				+  // (round to) zero wherever the input is not at the maximum value.
			
 
				+
			
 
				+  // If the overall scale is less than one, and input_integer_bits=0, then the
			
 
				+  // result is double equivalent of Q0.31 (actually with more precision). Thus
			
 
				+  // this generates a Q(input_integer_bits).(31-input_integer_bits)
			
 
				+  // representation.
			
 
				+#ifdef TFLITE_EMULATE_FLOAT
			
 
				+  const double input_beta = IntegerDoubleMultiply(beta, input_scale);
			
 
				+  int shift;
			
 
				+  int64_t fraction = IntegerFrExp(input_beta, &shift);
			
 
				+  shift += (31 - input_integer_bits);
			
 
				+  double input_beta_real_multiplier =
			
 
				+      DoubleFromFractionAndShift(fraction, shift);
			
 
				+  if (IntegerDoubleCompare(input_beta_real_multiplier, (1ll << 31) - 1.0) > 0) {
			
 
				+    input_beta_real_multiplier = (1ll << 31) - 1.0;
			
 
				+  }
			
 
				+#else   // TFLITE_EMULATE_FLOAT
			
 
				+  const double input_beta_real_multiplier = std::min(
			
 
				+      beta * input_scale * (1 << (31 - input_integer_bits)), (1ll << 31) - 1.0);
			
 
				+#endif  // TFLITE_EMULATE_FLOAT
			
 
				+
			
 
				+  QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier,
			
 
				+                                   quantized_multiplier, left_shift);
			
 
				+}
			
 
				+
			
 
				+void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
			
 
				+                                    int input_integer_bits,
			
 
				+                                    int32_t* quantized_multiplier,
			
 
				+                                    int* left_shift,
			
 
				+                                    int32_t* reverse_scaling_divisor,
			
 
				+                                    int* reverse_scaling_left_shift) {
			
 
				+  PreprocessSoftmaxScaling(beta, input_scale, input_integer_bits,
			
 
				+                           quantized_multiplier, left_shift);
			
 
				+
			
 
				+  // Also calculate what amounts to the inverse scaling factor for the input.
			
 
				+  const double real_reverse_scaling_divisor =
			
 
				+      (1 << (31 - *left_shift)) / static_cast<double>(*quantized_multiplier);
			
 
				+  tflite::QuantizeMultiplierSmallerThanOneExp(real_reverse_scaling_divisor,
			
 
				+                                              reverse_scaling_divisor,
			
 
				+                                              reverse_scaling_left_shift);
			
 
				+}
			
 
				+
			
 
				+int CalculateInputRadius(int input_integer_bits, int input_left_shift,
			
 
				+                         int total_signed_bits) {
			
 
				+#ifdef TFLITE_EMULATE_FLOAT
			
 
				+  int64_t result = (1 << input_integer_bits) - 1;
			
 
				+  result <<= (total_signed_bits - input_integer_bits);
			
 
				+  result >>= input_left_shift;
			
 
				+  return result;
			
 
				+#else   // TFLITE_EMULATE_FLOAT
			
 
				+  const double max_input_rescaled =
			
 
				+      1.0 * ((1 << input_integer_bits) - 1) *
			
 
				+      (1ll << (total_signed_bits - input_integer_bits)) /
			
 
				+      (1ll << input_left_shift);
			
 
				+  // Tighten bound using floor.  Suppose that we could use the exact value.
			
 
				+  // After scaling the difference, the result would be at the maximum.  Thus we
			
 
				+  // must ensure that our value has lower magnitude.
			
 
				+  return static_cast<int>(std::floor(max_input_rescaled));
			
 
				+#endif  // TFLITE_EMULATE_FLOAT
			
 
				+}
			
 
				+
			
 
				+void NudgeQuantizationRange(const float min, const float max,
			
 
				+                            const int quant_min, const int quant_max,
			
 
				+                            float* nudged_min, float* nudged_max,
			
 
				+                            float* nudged_scale) {
			
 
				+  // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
			
 
				+  const float quant_min_float = static_cast<float>(quant_min);
			
 
				+  const float quant_max_float = static_cast<float>(quant_max);
			
 
				+  *nudged_scale = (max - min) / (quant_max_float - quant_min_float);
			
 
				+  const float zero_point_from_min = quant_min_float - min / *nudged_scale;
			
 
				+  uint16_t nudged_zero_point;
			
 
				+  if (zero_point_from_min < quant_min_float) {
			
 
				+    nudged_zero_point = static_cast<uint16_t>(quant_min);
			
 
				+  } else if (zero_point_from_min > quant_max_float) {
			
 
				+    nudged_zero_point = static_cast<uint16_t>(quant_max);
			
 
				+  } else {
			
 
				+    nudged_zero_point = static_cast<uint16_t>(TfLiteRound(zero_point_from_min));
			
 
				+  }
			
 
				+  *nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale);
			
 
				+  *nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale);
			
 
				+}
			
 
				+
			
 
				+void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
			
 
				+                       const float nudged_max, const float* input_data,
			
 
				+                       float* output_data, const float size) {
			
 
				+  // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
			
 
				+  const float inv_nudged_scale = 1.0f / nudged_scale;
			
 
				+
			
 
				+  for (int i = 0; i < size; i++) {
			
 
				+    const float src_val = input_data[i];
			
 
				+    const float clamped = std::min(nudged_max, std::max(nudged_min, src_val));
			
 
				+    const float clamped_shifted = clamped - nudged_min;
			
 
				+    const float dst_val =
			
 
				+        TfLiteRound(clamped_shifted * inv_nudged_scale) * nudged_scale +
			
 
				+        nudged_min;
			
 
				+    output_data[i] = dst_val;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+bool CheckedLog2(const float x, int* log2_result) {
			
 
				+  // Using TfLiteRound instead of std::round and std::log instead of
			
 
				+  // std::log2 to work around these functions being missing in a toolchain
			
 
				+  // used in some TensorFlow tests as of May 2018.
			
 
				+  const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
			
 
				+  const float x_log2_rounded = TfLiteRound(x_log2);
			
 
				+  const float x_log2_fracpart = x_log2 - x_log2_rounded;
			
 
				+
			
 
				+  *log2_result = static_cast<int>(x_log2_rounded);
			
 
				+  return std::abs(x_log2_fracpart) < 1e-3f;
			
 
				+}
			
 
				+
			
 
				+void QuantizeMultiplierArray(const double* effective_scales, size_t size,
			
 
				+                             int32_t* effective_scale_significand,
			
 
				+                             int* effective_shift) {
			
 
				+  for (size_t i = 0; i < size; ++i) {
			
 
				+    QuantizeMultiplier(effective_scales[i], &effective_scale_significand[i],
			
 
				+                       &effective_shift[i]);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/quantization_util.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/quantization_util.h
@@ -0,0 +1,292 @@
 
				+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
			
 
				+
			
 
				+#include <cmath>
			
 
				+#include <cstdint>
			
 
				+#include <limits>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+// Given the min and max values of a float array, return
			
 
				+// reasonable quantization parameters to use for this array.
			
 
				+template <typename T>
			
 
				+QuantizationParams ChooseQuantizationParams(double rmin, double rmax,
			
 
				+                                            bool narrow_range) {
			
 
				+  const T qmin = std::numeric_limits<T>::min() + (narrow_range ? 1 : 0);
			
 
				+  const T qmax = std::numeric_limits<T>::max();
			
 
				+  const double qmin_double = qmin;
			
 
				+  const double qmax_double = qmax;
			
 
				+  // 0 should always be a representable value. Let's assume that the initial
			
 
				+  // min,max range contains 0.
			
 
				+  TFLITE_CHECK_LE(rmin, 0.);
			
 
				+  TFLITE_CHECK_GE(rmax, 0.);
			
 
				+  if (rmin == rmax) {
			
 
				+    // Special case where the min,max range is a point. Should be {0}.
			
 
				+    TFLITE_CHECK_EQ(rmin, 0.);
			
 
				+    TFLITE_CHECK_EQ(rmax, 0.);
			
 
				+    QuantizationParams quantization_params;
			
 
				+    quantization_params.zero_point = 0;
			
 
				+    quantization_params.scale = 0.;
			
 
				+    return quantization_params;
			
 
				+  }
			
 
				+
			
 
				+  // General case.
			
 
				+  //
			
 
				+  // First determine the scale.
			
 
				+  const double scale = (rmax - rmin) / (qmax_double - qmin_double);
			
 
				+
			
 
				+  // Zero-point computation.
			
 
				+  // First the initial floating-point computation. The zero-point can be
			
 
				+  // determined from solving an affine equation for any known pair
			
 
				+  // (real value, corresponding quantized value).
			
 
				+  // We know two such pairs: (rmin, qmin) and (rmax, qmax).
			
 
				+  // The arithmetic error on the zero point computed from either pair
			
 
				+  // will be roughly machine_epsilon * (sum of absolute values of terms)
			
 
				+  // so we want to use the variant that adds the smaller terms.
			
 
				+  const double zero_point_from_min = qmin_double - rmin / scale;
			
 
				+  const double zero_point_from_max = qmax_double - rmax / scale;
			
 
				+  const double zero_point_from_min_error =
			
 
				+      std::abs(qmin_double) + std::abs(rmin / scale);
			
 
				+  const double zero_point_from_max_error =
			
 
				+      std::abs(qmax_double) + std::abs(rmax / scale);
			
 
				+
			
 
				+  const double zero_point_double =
			
 
				+      zero_point_from_min_error < zero_point_from_max_error
			
 
				+          ? zero_point_from_min
			
 
				+          : zero_point_from_max;
			
 
				+
			
 
				+  // Now we need to nudge the zero point to be an integer
			
 
				+  // (our zero points are integer, and this is motivated by the requirement
			
 
				+  // to be able to represent the real value "0" exactly as a quantized value,
			
 
				+  // which is required in multiple places, for example in Im2col with SAME
			
 
				+  // padding).
			
 
				+  T nudged_zero_point = 0;
			
 
				+  if (zero_point_double < qmin_double) {
			
 
				+    nudged_zero_point = qmin;
			
 
				+  } else if (zero_point_double > qmax_double) {
			
 
				+    nudged_zero_point = qmax;
			
 
				+  } else {
			
 
				+    nudged_zero_point = static_cast<T>(round(zero_point_double));
			
 
				+  }
			
 
				+  // The zero point should always be in the range of quantized value,
			
 
				+  // [qmin, qmax].
			
 
				+  TFLITE_CHECK_GE(nudged_zero_point, qmin);
			
 
				+  TFLITE_CHECK_LE(nudged_zero_point, qmax);
			
 
				+
			
 
				+  // Finally, store the result nudged quantization params.
			
 
				+  QuantizationParams quantization_params;
			
 
				+  quantization_params.zero_point = nudged_zero_point;
			
 
				+  quantization_params.scale = scale;
			
 
				+  return quantization_params;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+QuantizationParams ChooseQuantizationParams(double rmin, double rmax) {
			
 
				+  return ChooseQuantizationParams<T>(rmin, rmax, false);
			
 
				+}
			
 
				+
			
 
				+// Converts a floating-point number to an integer. For all inputs x where
			
 
				+// static_cast<IntOut>(x) is legal according to the C++ standard, the result
			
 
				+// is identical to that cast (i.e. the result is x with its fractional part
			
 
				+// truncated whenever that is representable as IntOut).
			
 
				+//
			
 
				+// static_cast would cause undefined behavior for the following cases, which
			
 
				+// have well-defined behavior for this function:
			
 
				+//
			
 
				+//  1. If x is NaN, the result is zero.
			
 
				+//
			
 
				+//  2. If the truncated form of x is above the representable range of IntOut,
			
 
				+//     the result is std::numeric_limits<IntOut>::max().
			
 
				+//
			
 
				+//  3. If the truncated form of x is below the representable range of IntOut,
			
 
				+//     the result is std::numeric_limits<IntOut>::min().
			
 
				+//
			
 
				+// Note that cases #2 and #3 cover infinities as well as finite numbers.
			
 
				+//
			
 
				+// The range of FloatIn must include the range of IntOut, otherwise
			
 
				+// the results are undefined.
			
 
				+// TODO(sfeuz): Replace by absl::SafeCast once available.
			
 
				+template <class IntOut, class FloatIn>
			
 
				+IntOut SafeCast(FloatIn x) {
			
 
				+  static_assert(!std::numeric_limits<FloatIn>::is_integer,
			
 
				+                "FloatIn is integer");
			
 
				+  static_assert(std::numeric_limits<IntOut>::is_integer,
			
 
				+                "IntOut is not integer");
			
 
				+  static_assert(std::numeric_limits<IntOut>::radix == 2, "IntOut is base 2");
			
 
				+
			
 
				+  // Special case NaN, for which the logic below doesn't work.
			
 
				+  if (std::isnan(x)) {
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  // Negative values all clip to zero for unsigned results.
			
 
				+  if (!std::numeric_limits<IntOut>::is_signed && x < 0) {
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  // Handle infinities.
			
 
				+  if (std::isinf(x)) {
			
 
				+    return x < 0 ? std::numeric_limits<IntOut>::min()
			
 
				+                 : std::numeric_limits<IntOut>::max();
			
 
				+  }
			
 
				+
			
 
				+  // Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0),
			
 
				+  // unless x is zero in which case exp == 0. Note that this implies that the
			
 
				+  // magnitude of x is strictly less than 2^exp.
			
 
				+  int exp = 0;
			
 
				+  std::frexp(x, &exp);
			
 
				+
			
 
				+  // Let N be the number of non-sign bits in the representation of IntOut. If
			
 
				+  // the magnitude of x is strictly less than 2^N, the truncated version of x
			
 
				+  // is representable as IntOut. The only representable integer for which this
			
 
				+  // is not the case is kMin for signed types (i.e. -2^N), but that is covered
			
 
				+  // by the fall-through below.
			
 
				+  if (exp <= std::numeric_limits<IntOut>::digits) {
			
 
				+    return x;
			
 
				+  }
			
 
				+
			
 
				+  // Handle numbers with magnitude >= 2^N.
			
 
				+  return x < 0 ? std::numeric_limits<IntOut>::min()
			
 
				+               : std::numeric_limits<IntOut>::max();
			
 
				+}
			
 
				+
			
 
				+// Decompose a double multiplier into a Q0.31 int32 representation of its
			
 
				+// significand, and shift representation of NEGATIVE its exponent ---
			
 
				+// this is intended as a RIGHT-shift.
			
 
				+//
			
 
				+// Restricted to the case where the multiplier < 1 (and non-negative).
			
 
				+void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
			
 
				+                                         int32_t* quantized_multiplier,
			
 
				+                                         int* left_shift);
			
 
				+
			
 
				+// Decompose a double multiplier into a Q0.31 int32 representation of its
			
 
				+// significand, and shift representation of its exponent.
			
 
				+//
			
 
				+// Restricted to the case where the multiplier > 1.
			
 
				+void QuantizeMultiplierGreaterThanOne(double double_multiplier,
			
 
				+                                      int32_t* quantized_multiplier,
			
 
				+                                      int* left_shift);
			
 
				+
			
 
				+// Decompose a double multiplier into a Q0.31 int32 representation of its
			
 
				+// significand, and shift representation of its exponent.
			
 
				+//
			
 
				+// Handles an arbitrary positive multiplier. The 'shift' output-value is
			
 
				+// basically the 'floating-point exponent' of the multiplier:
			
 
				+// Negative for a right-shift (when the multiplier is <1), positive for a
			
 
				+// left-shift (when the multiplier is >1)
			
 
				+void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
			
 
				+                        int* shift);
			
 
				+
			
 
				+// Splits a double input value into a returned fraction, and a shift value from
			
 
				+// the exponent, using only bitwise and integer operations to support
			
 
				+// microcontrollers and other environments without floating-point support.
			
 
				+//
			
 
				+// This is designed to be a replacement for how std::frexp() is used within the
			
 
				+// QuantizeMultiplier() function, and so has a different signature than the
			
 
				+// standard version, returning a 64-bit integer rather than a double. This
			
 
				+// result has a maximum value of 1<<31, with the fraction expressed as a
			
 
				+// proportion of that maximum.
			
 
				+//
			
 
				+// std::frexp() returns NaNs and infinities unmodified, but since we're
			
 
				+// returning integers that can't represent those values, instead we return
			
 
				+// a shift of std::numeric_limits<int>::max() for all bad numbers, with an int64
			
 
				+// result of 0 for NaNs, std:numeric_limits<int64_t>::max() for +INFINITY, and
			
 
				+// std::numeric_limits<int64_t>::min() for -INFINITY. Denormalized inputs will
			
 
				+// result in return values that end up truncating some bits at the end,
			
 
				+// reflecting the loss of precision inherent in denormalization.
			
 
				+int64_t IntegerFrExp(double input, int* shift);
			
 
				+
			
 
				+// Converts an integer fraction in the format produced by IntegerFrExp (where
			
 
				+// 0x40000000 is 1.0) and an exponent shift (between -1022 and +1022) into an
			
 
				+// IEEE binary64 double format result. The implementation uses only integer and
			
 
				+// bitwise operators, so no floating point hardware support or emulation is
			
 
				+// needed. This is here so quantized operations can run non-time-critical
			
 
				+// preparation calculations on microcontrollers and other platforms without
			
 
				+// float support.
			
 
				+double DoubleFromFractionAndShift(int64_t fraction, int shift);
			
 
				+
			
 
				+// Performs a multiplication of two numbers in double format, using only integer
			
 
				+// and bitwise instructions. This is aimed at supporting housekeeping functions
			
 
				+// for quantized operations on microcontrollers without floating-point hardware.
			
 
				+double IntegerDoubleMultiply(double a, double b);
			
 
				+
			
 
				+// Returns -1 if a is less than b, 0 if a and b are equal, and +1 if a is
			
 
				+// greater than b. It is implemented using only integer and logical instructions
			
 
				+// so that it can be easily run on microcontrollers for quantized operations.
			
 
				+int IntegerDoubleCompare(double a, double b);
			
 
				+
			
 
				+// This first creates a multiplier in a double equivalent of
			
 
				+// Q(input_integer_bits).(31-input_integer_bits) representation, with extra
			
 
				+// precision in the double's fractional bits.  It then splits the result into
			
 
				+// significand and exponent.
			
 
				+void PreprocessSoftmaxScaling(double beta, double input_scale,
			
 
				+                              int input_integer_bits,
			
 
				+                              int32_t* quantized_multiplier, int* left_shift);
			
 
				+// Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated.
			
 
				+void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
			
 
				+                                    int input_integer_bits,
			
 
				+                                    int32_t* quantized_multiplier,
			
 
				+                                    int* left_shift,
			
 
				+                                    int32_t* reverse_scaling_divisor,
			
 
				+                                    int* reverse_scaling_left_shift);
			
 
				+// Calculate the largest input that will result in a within-bounds intermediate
			
 
				+// result within MultiplyByQuantizedMultiplierGreaterThanOne.  In other words,
			
 
				+// it must not overflow before we reduce the value by multiplication by the
			
 
				+// input multiplier.  The negative radius is used as the minimum difference in
			
 
				+// Softmax.
			
 
				+int CalculateInputRadius(int input_integer_bits, int input_left_shift,
			
 
				+                         int total_signed_bits = 31);
			
 
				+
			
 
				+// Nudges a min/max quantization range to ensure zero is zero.
			
 
				+// Gymnastics with nudged zero point is to ensure that real zero maps to
			
 
				+// an integer, which is required for e.g. zero-padding in convolutional layers.
			
 
				+// Outputs nudged_min, nudged_max, nudged_scale.
			
 
				+void NudgeQuantizationRange(const float min, const float max,
			
 
				+                            const int quant_min, const int quant_max,
			
 
				+                            float* nudged_min, float* nudged_max,
			
 
				+                            float* nudged_scale);
			
 
				+
			
 
				+// Fake quantizes (quantizes and dequantizes) input_data using the scale,
			
 
				+// nudged_min, and nudged_max from NudgeQuantizationRange. This matches the code
			
 
				+// in TensorFlow's FakeQuantizeWithMinMaxVarsFunctor.
			
 
				+void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
			
 
				+                       const float nudged_max, const float* input_data,
			
 
				+                       float* output_data, const float size);
			
 
				+
			
 
				+// If x is approximately a power of two (with any positive or negative
			
 
				+// exponent), stores that exponent (i.e. log2(x)) in *log2_result, otherwise
			
 
				+// returns false.
			
 
				+bool CheckedLog2(const float x, int* log2_result);
			
 
				+
			
 
				+// Decomposes an array of double multipliers into a Q0.31 int32 representation
			
 
				+// of its significand, and shift representation of its exponent.
			
 
				+//
			
 
				+// Handles an arbitrary multiplier. The 'shift' output-value is
			
 
				+// basically the 'floating-point exponent' of the multiplier:
			
 
				+// Negative for a right-shift (when the multiplier is <1), positive for a
			
 
				+// left-shift (when the multiplier is >1)
			
 
				+void QuantizeMultiplierArray(const double* effective_scales, size_t size,
			
 
				+                             int32_t* effective_scale_significand,
			
 
				+                             int* effective_shift);
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/add.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/add.h
@@ -0,0 +1,454 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
			
 
				+
			
 
				+#include "fixedpoint/fixedpoint.h"
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void Add(const ArithmeticParams& params,
			
 
				+                const RuntimeShape& input1_shape, const T* input1_data,
			
 
				+                const RuntimeShape& input2_shape, const T* input2_data,
			
 
				+                const RuntimeShape& output_shape, T* output_data) {
			
 
				+  const int flat_size =
			
 
				+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				+    output_data[i] = ActivationFunctionWithMinMax(
			
 
				+        input1_data[i] + input2_data[i], params.quantized_activation_min,
			
 
				+        params.quantized_activation_max);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void Add(const ArithmeticParams& params,
			
 
				+                const RuntimeShape& input1_shape, const float* input1_data,
			
 
				+                const RuntimeShape& input2_shape, const float* input2_data,
			
 
				+                const RuntimeShape& output_shape, float* output_data) {
			
 
				+  const int flat_size =
			
 
				+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				+  for (int i = 0; i < flat_size; i++) {
			
 
				+    auto x = input1_data[i] + input2_data[i];
			
 
				+    output_data[i] = ActivationFunctionWithMinMax(
			
 
				+        x, params.float_activation_min, params.float_activation_max);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Element-wise add that can often be used for inner loop of broadcast add as
			
 
				+// well as the non-broadcast add.
			
 
				+
			
 
				+// This function is used for 8-bit as well as for 16-bit, but the accumulator
			
 
				+// is 32-bit for both cases. The overflow does not happen due to the
			
 
				+// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
			
 
				+template <typename T>
			
 
				+inline void AddElementwise(int size, const ArithmeticParams& params,
			
 
				+                           const T* input1_data, const T* input2_data,
			
 
				+                           T* output_data) {
			
 
				+  TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max());
			
 
				+  TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max());
			
 
				+  TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max());
			
 
				+  TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max());
			
 
				+
			
 
				+  for (int i = 0; i < size; ++i) {
			
 
				+    const int32_t input1_val = params.input1_offset + input1_data[i];
			
 
				+    const int32_t input2_val = params.input2_offset + input2_data[i];
			
 
				+    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
			
 
				+    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
			
 
				+    const int32_t scaled_input1_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input1_val, params.input1_multiplier, params.input1_shift);
			
 
				+    const int32_t scaled_input2_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input2_val, params.input2_multiplier, params.input2_shift);
			
 
				+    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
			
 
				+    const int32_t raw_output =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            raw_sum, params.output_multiplier, params.output_shift) +
			
 
				+        params.output_offset;
			
 
				+    const int32_t clamped_output =
			
 
				+        std::min(params.quantized_activation_max,
			
 
				+                 std::max(params.quantized_activation_min, raw_output));
			
 
				+    output_data[i] = static_cast<T>(clamped_output);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Scalar-broadcast add that can be used for inner loop of more general
			
 
				+// broadcast add, so that, for example, scalar-broadcast with batch will still
			
 
				+// be fast.
			
 
				+inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
			
 
				+                               uint8_t input1_data, const uint8_t* input2_data,
			
 
				+                               uint8_t* output_data) {
			
 
				+  TFLITE_DCHECK_GT(params.input1_offset, -256);
			
 
				+  TFLITE_DCHECK_GT(params.input2_offset, -256);
			
 
				+  TFLITE_DCHECK_LT(params.input1_offset, 256);
			
 
				+  TFLITE_DCHECK_LT(params.input2_offset, 256);
			
 
				+
			
 
				+  const int32_t input1_val = params.input1_offset + input1_data;
			
 
				+  const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
			
 
				+  const int32_t scaled_input1_val =
			
 
				+      MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+          shifted_input1_val, params.input1_multiplier, params.input1_shift);
			
 
				+  for (int i = 0; i < size; ++i) {
			
 
				+    const int32_t input2_val = params.input2_offset + input2_data[i];
			
 
				+    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
			
 
				+    const int32_t scaled_input2_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input2_val, params.input2_multiplier, params.input2_shift);
			
 
				+    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
			
 
				+    const int32_t raw_output =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            raw_sum, params.output_multiplier, params.output_shift) +
			
 
				+        params.output_offset;
			
 
				+    const int32_t clamped_output =
			
 
				+        std::min(params.quantized_activation_max,
			
 
				+                 std::max(params.quantized_activation_min, raw_output));
			
 
				+    output_data[i] = static_cast<uint8_t>(clamped_output);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void Add(const ArithmeticParams& params,
			
 
				+                const RuntimeShape& input1_shape, const uint8_t* input1_data,
			
 
				+                const RuntimeShape& input2_shape, const uint8_t* input2_data,
			
 
				+                const RuntimeShape& output_shape, uint8_t* output_data) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+  const int flat_size =
			
 
				+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				+
			
 
				+  TFLITE_DCHECK_GT(params.input1_offset, -256);
			
 
				+  TFLITE_DCHECK_GT(params.input2_offset, -256);
			
 
				+  TFLITE_DCHECK_LT(params.input1_offset, 256);
			
 
				+  TFLITE_DCHECK_LT(params.input2_offset, 256);
			
 
				+  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				+}
			
 
				+
			
 
				+inline void AddGeneralParamScale(const ArithmeticParams& params,
			
 
				+                                 const RuntimeShape& input1_shape,
			
 
				+                                 const int16_t* input1_data,
			
 
				+                                 const RuntimeShape& input2_shape,
			
 
				+                                 const int16_t* input2_data,
			
 
				+                                 const RuntimeShape& output_shape,
			
 
				+                                 int16_t* output_data) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+  const int flat_size =
			
 
				+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				+
			
 
				+  int max_value = std::numeric_limits<int16_t>::max();
			
 
				+
			
 
				+  TFLITE_DCHECK_GT(params.input1_offset, -max_value);
			
 
				+  TFLITE_DCHECK_GT(params.input2_offset, -max_value);
			
 
				+  TFLITE_DCHECK_LT(params.input1_offset, max_value);
			
 
				+  TFLITE_DCHECK_LT(params.input2_offset, max_value);
			
 
				+  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				+}
			
 
				+
			
 
				+inline void Add(const ArithmeticParams& params,
			
 
				+                const RuntimeShape& input1_shape, const int16_t* input1_data,
			
 
				+                const RuntimeShape& input2_shape, const int16_t* input2_data,
			
 
				+                const RuntimeShape& output_shape, int16_t* output_data,
			
 
				+                bool pot_scale = true) {
			
 
				+  if (!pot_scale) {
			
 
				+    AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
			
 
				+                         input2_data, output_shape, output_data);
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+
			
 
				+  const int input1_shift = params.input1_shift;
			
 
				+  const int flat_size =
			
 
				+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				+  const int16_t output_activation_min = params.quantized_activation_min;
			
 
				+  const int16_t output_activation_max = params.quantized_activation_max;
			
 
				+
			
 
				+  TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
			
 
				+  TFLITE_DCHECK_LE(input1_shift, 0);
			
 
				+  TFLITE_DCHECK_LE(params.input2_shift, 0);
			
 
				+  const int16_t* not_shift_input =
			
 
				+      input1_shift == 0 ? input1_data : input2_data;
			
 
				+  const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data;
			
 
				+  const int input_right_shift =
			
 
				+      input1_shift == 0 ? -params.input2_shift : -input1_shift;
			
 
				+
			
 
				+  for (int i = 0; i < flat_size; i++) {
			
 
				+    // F0 uses 0 integer bits, range [-1, 1].
			
 
				+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
			
 
				+
			
 
				+    F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
			
 
				+    F0 scaled_input = F0::FromRaw(
			
 
				+        gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
			
 
				+    F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled);
			
 
				+    const int16_t raw_output = result.raw();
			
 
				+    const int16_t clamped_output = std::min(
			
 
				+        output_activation_max, std::max(output_activation_min, raw_output));
			
 
				+    output_data[i] = clamped_output;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// TODO(jiawen): We can implement BroadcastAdd on buffers of arbitrary
			
 
				+// dimensionality if the runtime code does a single loop over one dimension
			
 
				+// that handles broadcasting as the base case. The code generator would then
			
 
				+// generate max(D1, D2) nested for loops.
			
 
				+// TODO(benoitjacob): BroadcastAdd is intentionally duplicated from
			
 
				+// reference_ops.h. Once an optimized version is implemented and NdArrayDesc<T>
			
 
				+// is no longer referenced in this file, move NdArrayDesc<T> from types.h to
			
 
				+// reference_ops.h.
			
 
				+inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
			
 
				+                               const RuntimeShape& input1_shape,
			
 
				+                               const float* input1_data,
			
 
				+                               const RuntimeShape& input2_shape,
			
 
				+                               const float* input2_data,
			
 
				+                               const RuntimeShape& output_shape,
			
 
				+                               float* output_data) {
			
 
				+  NdArrayDesc<4> desc1;
			
 
				+  NdArrayDesc<4> desc2;
			
 
				+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				+                                      &desc2);
			
 
				+  const RuntimeShape extended_output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, output_shape);
			
 
				+
			
 
				+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				+  // col, channel), with extents (batches, height, width, depth), with the
			
 
				+  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				+  // typically 1 element).
			
 
				+  //
			
 
				+  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				+  // first dimension has smallest stride.
			
 
				+  //
			
 
				+  // We name our variables by their Tensorflow convention, but generate C code
			
 
				+  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				+  // best cache behavior.
			
 
				+  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
			
 
				+    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
			
 
				+      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
			
 
				+        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
			
 
				+          output_data[Offset(extended_output_shape, b, y, x, c)] =
			
 
				+              ActivationFunctionWithMinMax(
			
 
				+                  input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
			
 
				+                      input2_data[SubscriptToIndex(desc2, b, y, x, c)],
			
 
				+                  params.float_activation_min, params.float_activation_max);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
			
 
				+                               const RuntimeShape& input1_shape,
			
 
				+                               const int32_t* input1_data,
			
 
				+                               const RuntimeShape& input2_shape,
			
 
				+                               const int32_t* input2_data,
			
 
				+                               const RuntimeShape& output_shape,
			
 
				+                               int32_t* output_data) {
			
 
				+  NdArrayDesc<4> desc1;
			
 
				+  NdArrayDesc<4> desc2;
			
 
				+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				+                                      &desc2);
			
 
				+  const RuntimeShape extended_output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, output_shape);
			
 
				+
			
 
				+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				+  // col, channel), with extents (batches, height, width, depth), with the
			
 
				+  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				+  // typically 1 element).
			
 
				+  //
			
 
				+  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				+  // first dimension has smallest stride.
			
 
				+  //
			
 
				+  // We name our variables by their Tensorflow convention, but generate C code
			
 
				+  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				+  // best cache behavior.
			
 
				+  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
			
 
				+    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
			
 
				+      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
			
 
				+        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
			
 
				+          output_data[Offset(extended_output_shape, b, y, x, c)] =
			
 
				+              ActivationFunctionWithMinMax(
			
 
				+                  input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
			
 
				+                      input2_data[SubscriptToIndex(desc2, b, y, x, c)],
			
 
				+                  params.quantized_activation_min,
			
 
				+                  params.quantized_activation_max);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// This function is used for 8-bit as well as for 16-bit, but the accumulator
			
 
				+// is 32-bit for both cases. The overflow does not happen due to the
			
 
				+// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
			
 
				+template <typename T>
			
 
				+inline void BroadcastAdd4DSlow(
			
 
				+    const ArithmeticParams& params, const RuntimeShape& input1_shape,
			
 
				+    const T* input1_data, const RuntimeShape& input2_shape,
			
 
				+    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
			
 
				+  NdArrayDesc<4> desc1;
			
 
				+  NdArrayDesc<4> desc2;
			
 
				+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				+                                      &desc2);
			
 
				+  const RuntimeShape extended_output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, output_shape);
			
 
				+
			
 
				+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				+  // col, channel), with extents (batches, height, width, depth), with the
			
 
				+  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				+  // typically 1 element).
			
 
				+  //
			
 
				+  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				+  // first dimension has smallest stride.
			
 
				+  //
			
 
				+  // We name our variables by their Tensorflow convention, but generate C code
			
 
				+  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				+  // best cache behavior.
			
 
				+  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
			
 
				+    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
			
 
				+      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
			
 
				+        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
			
 
				+          const int32_t input1_val =
			
 
				+              params.input1_offset +
			
 
				+              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
			
 
				+          const int32_t input2_val =
			
 
				+              params.input2_offset +
			
 
				+              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
			
 
				+          const int32_t shifted_input1_val =
			
 
				+              input1_val * (1 << params.left_shift);
			
 
				+          const int32_t shifted_input2_val =
			
 
				+              input2_val * (1 << params.left_shift);
			
 
				+          const int32_t scaled_input1_val =
			
 
				+              MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+                  shifted_input1_val, params.input1_multiplier,
			
 
				+                  params.input1_shift);
			
 
				+          const int32_t scaled_input2_val =
			
 
				+              MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+                  shifted_input2_val, params.input2_multiplier,
			
 
				+                  params.input2_shift);
			
 
				+          const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
			
 
				+          const int32_t raw_output =
			
 
				+              MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+                  raw_sum, params.output_multiplier, params.output_shift) +
			
 
				+              params.output_offset;
			
 
				+          const int32_t clamped_output =
			
 
				+              std::min(params.quantized_activation_max,
			
 
				+                       std::max(params.quantized_activation_min, raw_output));
			
 
				+          output_data[Offset(extended_output_shape, b, y, x, c)] =
			
 
				+              static_cast<T>(clamped_output);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
			
 
				+                                 const RuntimeShape& unswitched_input1_shape,
			
 
				+                                 const uint8_t* unswitched_input1_data,
			
 
				+                                 const RuntimeShape& unswitched_input2_shape,
			
 
				+                                 const uint8_t* unswitched_input2_data,
			
 
				+                                 const RuntimeShape& output_shape,
			
 
				+                                 uint8_t* output_data) {
			
 
				+  ArithmeticParams switched_params = unswitched_params;
			
 
				+  switched_params.input1_offset = unswitched_params.input2_offset;
			
 
				+  switched_params.input1_multiplier = unswitched_params.input2_multiplier;
			
 
				+  switched_params.input1_shift = unswitched_params.input2_shift;
			
 
				+  switched_params.input2_offset = unswitched_params.input1_offset;
			
 
				+  switched_params.input2_multiplier = unswitched_params.input1_multiplier;
			
 
				+  switched_params.input2_shift = unswitched_params.input1_shift;
			
 
				+
			
 
				+  const bool use_unswitched =
			
 
				+      unswitched_params.broadcast_category ==
			
 
				+      tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
			
 
				+
			
 
				+  const ArithmeticParams& params =
			
 
				+      use_unswitched ? unswitched_params : switched_params;
			
 
				+  const uint8_t* input1_data =
			
 
				+      use_unswitched ? unswitched_input1_data : unswitched_input2_data;
			
 
				+  const uint8_t* input2_data =
			
 
				+      use_unswitched ? unswitched_input2_data : unswitched_input1_data;
			
 
				+
			
 
				+  // Fivefold nested loops. The second input resets its position for each
			
 
				+  // iteration of the second loop. The first input resets its position at the
			
 
				+  // beginning of the fourth loop. The innermost loop is an elementwise add of
			
 
				+  // sections of the arrays.
			
 
				+  uint8_t* output_data_ptr = output_data;
			
 
				+  const uint8_t* input1_data_ptr = input1_data;
			
 
				+  const uint8_t* input2_data_reset = input2_data;
			
 
				+  // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
			
 
				+  // between input shapes. y3 for input 1 is always broadcast, and so the
			
 
				+  // dimension there is 1, whereas optionally y1 might be broadcast for input 2.
			
 
				+  // Put another way,
			
 
				+  // input1.shape.FlatSize = y0 * y1 * y2 * y4,
			
 
				+  // input2.shape.FlatSize = y0 * y2 * y3 * y4.
			
 
				+  int y0 = params.broadcast_shape[0];
			
 
				+  int y1 = params.broadcast_shape[1];
			
 
				+  int y2 = params.broadcast_shape[2];
			
 
				+  int y3 = params.broadcast_shape[3];
			
 
				+  int y4 = params.broadcast_shape[4];
			
 
				+  if (y4 > 1) {
			
 
				+    // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
			
 
				+    // dimension.
			
 
				+    for (int i0 = 0; i0 < y0; ++i0) {
			
 
				+      const uint8_t* input2_data_ptr;
			
 
				+      for (int i1 = 0; i1 < y1; ++i1) {
			
 
				+        input2_data_ptr = input2_data_reset;
			
 
				+        for (int i2 = 0; i2 < y2; ++i2) {
			
 
				+          for (int i3 = 0; i3 < y3; ++i3) {
			
 
				+            AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
			
 
				+                           output_data_ptr);
			
 
				+            input2_data_ptr += y4;
			
 
				+            output_data_ptr += y4;
			
 
				+          }
			
 
				+          // We have broadcast y4 of input1 data y3 times, and now move on.
			
 
				+          input1_data_ptr += y4;
			
 
				+        }
			
 
				+      }
			
 
				+      // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
			
 
				+      input2_data_reset = input2_data_ptr;
			
 
				+    }
			
 
				+  } else {
			
 
				+    // Special case of y4 == 1, in which the innermost loop is a single element
			
 
				+    // and can be combined with the next (y3) as an inner broadcast.
			
 
				+    //
			
 
				+    // Note that this handles the case of pure scalar broadcast when
			
 
				+    // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
			
 
				+    // broadcast with batch (as y2 > 1).
			
 
				+    //
			
 
				+    // NOTE The process is the same as the above general case except simplified
			
 
				+    // for y4 == 1 and the loop over y3 is contained within the
			
 
				+    // AddScalarBroadcast function.
			
 
				+    for (int i0 = 0; i0 < y0; ++i0) {
			
 
				+      const uint8_t* input2_data_ptr;
			
 
				+      for (int i1 = 0; i1 < y1; ++i1) {
			
 
				+        input2_data_ptr = input2_data_reset;
			
 
				+        for (int i2 = 0; i2 < y2; ++i2) {
			
 
				+          AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr,
			
 
				+                             output_data_ptr);
			
 
				+          input2_data_ptr += y3;
			
 
				+          output_data_ptr += y3;
			
 
				+          input1_data_ptr += 1;
			
 
				+        }
			
 
				+      }
			
 
				+      input2_data_reset = input2_data_ptr;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/arg_min_max.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/arg_min_max.h
@@ -0,0 +1,68 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+template <typename T1, typename T2, typename T3, typename Cmp>
			
 
				+void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
			
 
				+               const T3* input2_data, const RuntimeShape& output_shape,
			
 
				+               T2* output_data, const Cmp& cmp) {
			
 
				+  TFLITE_DCHECK_GT(input1_shape.DimensionsCount(), 0);
			
 
				+  TFLITE_DCHECK_EQ(input1_shape.DimensionsCount() - 1,
			
 
				+                   output_shape.DimensionsCount());
			
 
				+  int axis = input2_data[0];
			
 
				+  if (axis < 0) {
			
 
				+    axis += input1_shape.DimensionsCount();
			
 
				+  }
			
 
				+  const int axis_size = input1_shape.Dims(axis);
			
 
				+
			
 
				+  int outer_size = 1;
			
 
				+  for (int i = 0; i < axis; ++i) {
			
 
				+    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i));
			
 
				+    outer_size *= input1_shape.Dims(i);
			
 
				+  }
			
 
				+
			
 
				+  int inner_size = 1;
			
 
				+  const int dims_count = input1_shape.DimensionsCount();
			
 
				+  for (int i = axis + 1; i < dims_count; ++i) {
			
 
				+    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i - 1));
			
 
				+    inner_size *= input1_shape.Dims(i);
			
 
				+  }
			
 
				+  for (int outer = 0; outer < outer_size; ++outer) {
			
 
				+    for (int inner = 0; inner < inner_size; ++inner) {
			
 
				+      auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
			
 
				+      T2 min_max_index = 0;
			
 
				+      for (int i = 1; i < axis_size; ++i) {
			
 
				+        const auto& curr_value =
			
 
				+            input1_data[(outer * axis_size + i) * inner_size + inner];
			
 
				+        if (cmp(curr_value, min_max_value)) {
			
 
				+          min_max_value = curr_value;
			
 
				+          min_max_index = static_cast<T2>(i);
			
 
				+        }
			
 
				+      }
			
 
				+      output_data[outer * inner_size + inner] = min_max_index;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/binary_function.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/binary_function.h
@@ -0,0 +1,84 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+// TODO(ycling): Refactoring. Remove BroadcastLogical and use the more
			
 
				+// generalized and efficient BroadcastBinaryFunction.
			
 
				+//
			
 
				+// Also appears to duplicate MinimumMaximum.
			
 
				+//
			
 
				+// R: Result type. T1: Input 1 type. T2: Input 2 type.
			
 
				+template <typename R, typename T1, typename T2>
			
 
				+inline void BroadcastBinaryFunction4DSlow(
			
 
				+    const RuntimeShape& unextended_input1_shape, const T1* input1_data,
			
 
				+    const RuntimeShape& unextended_input2_shape, const T2* input2_data,
			
 
				+    const RuntimeShape& unextended_output_shape, R* output_data,
			
 
				+    R (*func)(T1, T2)) {
			
 
				+  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
			
 
				+  const RuntimeShape output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
			
 
				+
			
 
				+  NdArrayDesc<4> desc1;
			
 
				+  NdArrayDesc<4> desc2;
			
 
				+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
			
 
				+                                      unextended_input2_shape, &desc1, &desc2);
			
 
				+
			
 
				+  for (int b = 0; b < output_shape.Dims(0); ++b) {
			
 
				+    for (int y = 0; y < output_shape.Dims(1); ++y) {
			
 
				+      for (int x = 0; x < output_shape.Dims(2); ++x) {
			
 
				+        for (int c = 0; c < output_shape.Dims(3); ++c) {
			
 
				+          auto out_idx = Offset(output_shape, b, y, x, c);
			
 
				+          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
			
 
				+          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
			
 
				+          auto in1_val = input1_data[in1_idx];
			
 
				+          auto in2_val = input2_data[in2_idx];
			
 
				+          output_data[out_idx] = func(in1_val, in2_val);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// R: Result type. T1: Input 1 type. T2: Input 2 type.
			
 
				+// TODO(renjieliu): Refactor other binary functions to use this one.
			
 
				+template <typename R, typename T1, typename T2>
			
 
				+inline void BinaryFunction(const RuntimeShape& input1_shape,
			
 
				+                           const T1* input1_data,
			
 
				+                           const RuntimeShape& input2_shape,
			
 
				+                           const T2* input2_data,
			
 
				+                           const RuntimeShape& output_shape, R* output_data,
			
 
				+                           R (*func)(T1, T2)) {
			
 
				+  const int flat_size =
			
 
				+      MatchingFlatSize(input1_shape, input2_shape, output_shape);
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				+    output_data[i] = func(input1_data[i], input2_data[i]);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/ceil.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/ceil.h
@@ -0,0 +1,37 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
			
 
				+
			
 
				+#include <cmath>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+inline void Ceil(const RuntimeShape& input_shape, const float* input_data,
			
 
				+                 const RuntimeShape& output_shape, float* output_data) {
			
 
				+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				+    output_data[i] = std::ceil(input_data[i]);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/comparisons.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/comparisons.h
@@ -0,0 +1,334 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
			
 
				+
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+#include "tensorflow/lite/string_util.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+template <typename T>
			
 
				+inline bool EqualFn(T lhs, T rhs) {
			
 
				+  return lhs == rhs;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline bool NotEqualFn(T lhs, T rhs) {
			
 
				+  return lhs != rhs;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline bool GreaterFn(T lhs, T rhs) {
			
 
				+  return lhs > rhs;
			
 
				+}
			
 
				+template <typename T>
			
 
				+inline bool GreaterEqualFn(T lhs, T rhs) {
			
 
				+  return lhs >= rhs;
			
 
				+}
			
 
				+template <typename T>
			
 
				+inline bool LessFn(T lhs, T rhs) {
			
 
				+  return lhs < rhs;
			
 
				+}
			
 
				+template <typename T>
			
 
				+inline bool LessEqualFn(T lhs, T rhs) {
			
 
				+  return lhs <= rhs;
			
 
				+}
			
 
				+
			
 
				+inline bool StringRefEqualFn(const StringRef& lhs, const StringRef& rhs) {
			
 
				+  if (lhs.len != rhs.len) return false;
			
 
				+  for (int i = 0; i < lhs.len; ++i) {
			
 
				+    if (lhs.str[i] != rhs.str[i]) return false;
			
 
				+  }
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+inline bool StringRefNotEqualFn(const StringRef& lhs, const StringRef& rhs) {
			
 
				+  return !StringRefEqualFn(lhs, rhs);
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+using ComparisonFn = bool (*)(T, T);
			
 
				+
			
 
				+template <typename T, ComparisonFn<T> F>
			
 
				+inline void ComparisonImpl(
			
 
				+    const ComparisonParams& op_params, const RuntimeShape& input1_shape,
			
 
				+    const T* input1_data, const RuntimeShape& input2_shape,
			
 
				+    const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
			
 
				+  const int64_t flatsize =
			
 
				+      MatchingFlatSize(input1_shape, input2_shape, output_shape);
			
 
				+  for (int64_t i = 0; i < flatsize; ++i) {
			
 
				+    output_data[i] = F(input1_data[i], input2_data[i]);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void ComparisonStringImpl(bool (*F)(const StringRef&, const StringRef&),
			
 
				+                                 const RuntimeShape& input1_shape,
			
 
				+                                 const TfLiteTensor* input1,
			
 
				+                                 const RuntimeShape& input2_shape,
			
 
				+                                 const TfLiteTensor* input2,
			
 
				+                                 const RuntimeShape& output_shape,
			
 
				+                                 bool* output_data) {
			
 
				+  const int64_t flatsize =
			
 
				+      MatchingFlatSize(input1_shape, input2_shape, output_shape);
			
 
				+  for (int64_t i = 0; i < flatsize; ++i) {
			
 
				+    const auto lhs = GetString(input1, i);
			
 
				+    const auto rhs = GetString(input2, i);
			
 
				+    output_data[i] = F(lhs, rhs);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <ComparisonFn<float> F>
			
 
				+inline void Comparison(const ComparisonParams& op_params,
			
 
				+                       const RuntimeShape& input1_shape,
			
 
				+                       const float* input1_data,
			
 
				+                       const RuntimeShape& input2_shape,
			
 
				+                       const float* input2_data,
			
 
				+                       const RuntimeShape& output_shape, bool* output_data) {
			
 
				+  ComparisonImpl<float, F>(op_params, input1_shape, input1_data, input2_shape,
			
 
				+                           input2_data, output_shape, output_data);
			
 
				+}
			
 
				+
			
 
				+template <typename T, ComparisonFn<int32_t> F>
			
 
				+inline void ComparisonWithScaling(
			
 
				+    const ComparisonParams& op_params, const RuntimeShape& input1_shape,
			
 
				+    const T* input1_data, const RuntimeShape& input2_shape,
			
 
				+    const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
			
 
				+  int left_shift = op_params.left_shift;
			
 
				+  int32_t input1_offset = op_params.input1_offset;
			
 
				+  int32_t input1_multiplier = op_params.input1_multiplier;
			
 
				+  int input1_shift = op_params.input1_shift;
			
 
				+  int32_t input2_offset = op_params.input2_offset;
			
 
				+  int32_t input2_multiplier = op_params.input2_multiplier;
			
 
				+  int input2_shift = op_params.input2_shift;
			
 
				+
			
 
				+  const int64_t flatsize =
			
 
				+      MatchingFlatSize(input1_shape, input2_shape, output_shape);
			
 
				+  for (int64_t i = 0; i < flatsize; ++i) {
			
 
				+    const int32_t input1_val = input1_offset + input1_data[i];
			
 
				+    const int32_t input2_val = input2_offset + input2_data[i];
			
 
				+    const int32_t shifted_input1_val = input1_val * (1 << left_shift);
			
 
				+    const int32_t shifted_input2_val = input2_val * (1 << left_shift);
			
 
				+    const int32_t scaled_input1_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input1_val, input1_multiplier, input1_shift);
			
 
				+    const int32_t scaled_input2_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input2_val, input2_multiplier, input2_shift);
			
 
				+    output_data[i] = F(scaled_input1_val, scaled_input2_val);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+struct BroadcastComparison4DSlowCommon {
			
 
				+  const RuntimeShape output_shape;
			
 
				+  NdArrayDesc<4> desc1;
			
 
				+  NdArrayDesc<4> desc2;
			
 
				+};
			
 
				+
			
 
				+inline BroadcastComparison4DSlowCommon BroadcastComparison4DSlowPreprocess(
			
 
				+    const RuntimeShape& unextended_input1_shape,
			
 
				+    const RuntimeShape& unextended_input2_shape,
			
 
				+    const RuntimeShape& unextended_output_shape) {
			
 
				+  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
			
 
				+  NdArrayDesc<4> desc1;
			
 
				+  NdArrayDesc<4> desc2;
			
 
				+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
			
 
				+                                      unextended_input2_shape, &desc1, &desc2);
			
 
				+  return {RuntimeShape::ExtendedShape(4, unextended_output_shape), desc1,
			
 
				+          desc2};
			
 
				+}
			
 
				+
			
 
				+template <typename T, ComparisonFn<T> F>
			
 
				+inline void BroadcastComparison4DSlowImpl(
			
 
				+    const ComparisonParams& op_params,
			
 
				+    const RuntimeShape& unextended_input1_shape, const T* input1_data,
			
 
				+    const RuntimeShape& unextended_input2_shape, const T* input2_data,
			
 
				+    const RuntimeShape& unextended_output_shape, bool* output_data) {
			
 
				+  const BroadcastComparison4DSlowCommon dims =
			
 
				+      BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
			
 
				+                                          unextended_input2_shape,
			
 
				+                                          unextended_output_shape);
			
 
				+
			
 
				+  for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
			
 
				+    for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
			
 
				+      for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
			
 
				+        for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
			
 
				+          output_data[Offset(dims.output_shape, b, y, x, c)] =
			
 
				+              F(input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)],
			
 
				+                input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void BroadcastComparison4DSlowStringImpl(
			
 
				+    bool (*F)(const StringRef&, const StringRef&),
			
 
				+    const RuntimeShape& unextended_input1_shape, const TfLiteTensor* input1,
			
 
				+    const RuntimeShape& unextended_input2_shape, const TfLiteTensor* input2,
			
 
				+    const RuntimeShape& unextended_output_shape, bool* output_data) {
			
 
				+  const BroadcastComparison4DSlowCommon dims =
			
 
				+      BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
			
 
				+                                          unextended_input2_shape,
			
 
				+                                          unextended_output_shape);
			
 
				+
			
 
				+  for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
			
 
				+    for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
			
 
				+      for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
			
 
				+        for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
			
 
				+          const auto lhs =
			
 
				+              GetString(input1, SubscriptToIndex(dims.desc1, b, y, x, c));
			
 
				+          const auto rhs =
			
 
				+              GetString(input2, SubscriptToIndex(dims.desc2, b, y, x, c));
			
 
				+          output_data[Offset(dims.output_shape, b, y, x, c)] = F(lhs, rhs);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <ComparisonFn<float> F>
			
 
				+inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
			
 
				+                                      const RuntimeShape& input1_shape,
			
 
				+                                      const float* input1_data,
			
 
				+                                      const RuntimeShape& input2_shape,
			
 
				+                                      const float* input2_data,
			
 
				+                                      const RuntimeShape& output_shape,
			
 
				+                                      bool* output_data) {
			
 
				+  BroadcastComparison4DSlowImpl<float, F>(op_params, input1_shape, input1_data,
			
 
				+                                          input2_shape, input2_data,
			
 
				+                                          output_shape, output_data);
			
 
				+}
			
 
				+
			
 
				+template <typename T, ComparisonFn<int32_t> F>
			
 
				+inline void BroadcastComparison4DSlowWithScaling(
			
 
				+    const ComparisonParams& op_params,
			
 
				+    const RuntimeShape& unextended_input1_shape, const T* input1_data,
			
 
				+    const RuntimeShape& unextended_input2_shape, const T* input2_data,
			
 
				+    const RuntimeShape& unextended_output_shape, bool* output_data) {
			
 
				+  const BroadcastComparison4DSlowCommon dims =
			
 
				+      BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
			
 
				+                                          unextended_input2_shape,
			
 
				+                                          unextended_output_shape);
			
 
				+
			
 
				+  int left_shift = op_params.left_shift;
			
 
				+  int32_t input1_offset = op_params.input1_offset;
			
 
				+  int32_t input1_multiplier = op_params.input1_multiplier;
			
 
				+  int input1_shift = op_params.input1_shift;
			
 
				+  int32_t input2_offset = op_params.input2_offset;
			
 
				+  int32_t input2_multiplier = op_params.input2_multiplier;
			
 
				+  int input2_shift = op_params.input2_shift;
			
 
				+
			
 
				+  for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
			
 
				+    for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
			
 
				+      for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
			
 
				+        for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
			
 
				+          const int32_t input1_val =
			
 
				+              input1_offset +
			
 
				+              input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)];
			
 
				+          const int32_t input2_val =
			
 
				+              input2_offset +
			
 
				+              input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)];
			
 
				+          const int32_t shifted_input1_val = input1_val * (1 << left_shift);
			
 
				+          const int32_t shifted_input2_val = input2_val * (1 << left_shift);
			
 
				+          const int32_t scaled_input1_val =
			
 
				+              MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+                  shifted_input1_val, input1_multiplier, input1_shift);
			
 
				+          const int32_t scaled_input2_val =
			
 
				+              MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+                  shifted_input2_val, input2_multiplier, input2_shift);
			
 
				+          output_data[Offset(dims.output_shape, b, y, x, c)] =
			
 
				+              F(scaled_input1_val, scaled_input2_val);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+#define TFLITE_COMPARISON_OP(name)                                             \
			
 
				+  inline void name(const ComparisonParams& op_params,                          \
			
 
				+                   const RuntimeShape& input1_shape, const float* input1_data, \
			
 
				+                   const RuntimeShape& input2_shape, const float* input2_data, \
			
 
				+                   const RuntimeShape& output_shape, bool* output_data) {      \
			
 
				+    Comparison<name##Fn>(op_params, input1_shape, input1_data, input2_shape,   \
			
 
				+                         input2_data, output_shape, output_data);              \
			
 
				+  }                                                                            \
			
 
				+  template <typename T>                                                        \
			
 
				+  inline void name##NoScaling(                                                 \
			
 
				+      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
			
 
				+      const T* input1_data, const RuntimeShape& input2_shape,                  \
			
 
				+      const T* input2_data, const RuntimeShape& output_shape,                  \
			
 
				+      bool* output_data) {                                                     \
			
 
				+    ComparisonImpl<T, name##Fn>(op_params, input1_shape, input1_data,          \
			
 
				+                                input2_shape, input2_data, output_shape,       \
			
 
				+                                output_data);                                  \
			
 
				+  }                                                                            \
			
 
				+  template <typename T>                                                        \
			
 
				+  inline void name##WithScaling(                                               \
			
 
				+      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
			
 
				+      const T* input1_data, const RuntimeShape& input2_shape,                  \
			
 
				+      const T* input2_data, const RuntimeShape& output_shape,                  \
			
 
				+      bool* output_data) {                                                     \
			
 
				+    ComparisonWithScaling<T, name##Fn>(op_params, input1_shape, input1_data,   \
			
 
				+                                       input2_shape, input2_data,              \
			
 
				+                                       output_shape, output_data);             \
			
 
				+  }                                                                            \
			
 
				+  template <typename T>                                                        \
			
 
				+  inline void Broadcast4DSlow##name##NoScaling(                                \
			
 
				+      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
			
 
				+      const T* input1_data, const RuntimeShape& input2_shape,                  \
			
 
				+      const T* input2_data, const RuntimeShape& output_shape,                  \
			
 
				+      bool* output_data) {                                                     \
			
 
				+    BroadcastComparison4DSlowImpl<T, name##Fn>(                                \
			
 
				+        op_params, input1_shape, input1_data, input2_shape, input2_data,       \
			
 
				+        output_shape, output_data);                                            \
			
 
				+  }                                                                            \
			
 
				+  inline void Broadcast4DSlow##name(                                           \
			
 
				+      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
			
 
				+      const float* input1_data, const RuntimeShape& input2_shape,              \
			
 
				+      const float* input2_data, const RuntimeShape& output_shape,              \
			
 
				+      bool* output_data) {                                                     \
			
 
				+    BroadcastComparison4DSlow<name##Fn>(op_params, input1_shape, input1_data,  \
			
 
				+                                        input2_shape, input2_data,             \
			
 
				+                                        output_shape, output_data);            \
			
 
				+  }                                                                            \
			
 
				+  template <typename T>                                                        \
			
 
				+  inline void Broadcast4DSlow##name##WithScaling(                              \
			
 
				+      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
			
 
				+      const T* input1_data, const RuntimeShape& input2_shape,                  \
			
 
				+      const T* input2_data, const RuntimeShape& output_shape,                  \
			
 
				+      bool* output_data) {                                                     \
			
 
				+    BroadcastComparison4DSlowWithScaling<T, name##Fn>(                         \
			
 
				+        op_params, input1_shape, input1_data, input2_shape, input2_data,       \
			
 
				+        output_shape, output_data);                                            \
			
 
				+  }
			
 
				+TFLITE_COMPARISON_OP(Equal);
			
 
				+TFLITE_COMPARISON_OP(NotEqual);
			
 
				+TFLITE_COMPARISON_OP(Greater);
			
 
				+TFLITE_COMPARISON_OP(GreaterEqual);
			
 
				+TFLITE_COMPARISON_OP(Less);
			
 
				+TFLITE_COMPARISON_OP(LessEqual);
			
 
				+#undef TFLITE_COMPARISON_OP
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/concatenation.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/concatenation.h
@@ -0,0 +1,140 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+template <typename Scalar>
			
 
				+inline void Concatenation(const ConcatenationParams& params,
			
 
				+                          const RuntimeShape* const* input_shapes,
			
 
				+                          const Scalar* const* input_data,
			
 
				+                          const RuntimeShape& output_shape,
			
 
				+                          Scalar* output_data) {
			
 
				+  int axis = params.axis;
			
 
				+  int inputs_count = params.inputs_count;
			
 
				+  const int concat_dimensions = output_shape.DimensionsCount();
			
 
				+  TFLITE_DCHECK_LT(axis, concat_dimensions);
			
 
				+
			
 
				+  int64_t concat_size = 0;
			
 
				+  for (int i = 0; i < inputs_count; i++) {
			
 
				+    TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
			
 
				+    for (int j = 0; j < concat_dimensions; j++) {
			
 
				+      if (j != axis) {
			
 
				+        MatchingDim(*input_shapes[i], j, output_shape, j);
			
 
				+      }
			
 
				+    }
			
 
				+    concat_size += input_shapes[i]->Dims(axis);
			
 
				+  }
			
 
				+  TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
			
 
				+  int64_t outer_size = 1;
			
 
				+  for (int i = 0; i < axis; ++i) {
			
 
				+    outer_size *= output_shape.Dims(i);
			
 
				+  }
			
 
				+  // For all input arrays,
			
 
				+  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
			
 
				+  int64_t base_inner_size = 1;
			
 
				+  for (int i = axis + 1; i < concat_dimensions; ++i) {
			
 
				+    base_inner_size *= output_shape.Dims(i);
			
 
				+  }
			
 
				+
			
 
				+  Scalar* output_ptr = output_data;
			
 
				+  for (int k = 0; k < outer_size; k++) {
			
 
				+    for (int i = 0; i < inputs_count; ++i) {
			
 
				+      const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
			
 
				+      const Scalar* input_ptr = input_data[i] + k * copy_size;
			
 
				+      memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar));
			
 
				+      output_ptr += copy_size;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// TODO(prabhumk): This is the same as the optimized implementation.
			
 
				+// TODO(prabhumk): The quantized implementation of concatentation isn't fully
			
 
				+// quantized as it takes scale as a floating point value. This should be fixed
			
 
				+// when optimizng this routine further.
			
 
				+inline void ConcatenationWithScaling(const ConcatenationParams& params,
			
 
				+                                     const RuntimeShape* const* input_shapes,
			
 
				+                                     const uint8_t* const* input_data,
			
 
				+                                     const RuntimeShape& output_shape,
			
 
				+                                     uint8_t* output_data) {
			
 
				+  int axis = params.axis;
			
 
				+  const int32_t* input_zeropoint = params.input_zeropoint;
			
 
				+  const float* input_scale = params.input_scale;
			
 
				+  int inputs_count = params.inputs_count;
			
 
				+  const int32_t output_zeropoint = params.output_zeropoint;
			
 
				+  const float output_scale = params.output_scale;
			
 
				+
			
 
				+  const int concat_dimensions = output_shape.DimensionsCount();
			
 
				+  TFLITE_DCHECK_LT(axis, concat_dimensions);
			
 
				+
			
 
				+  int64_t concat_size = 0;
			
 
				+  for (int i = 0; i < inputs_count; i++) {
			
 
				+    TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
			
 
				+    for (int j = 0; j < concat_dimensions; j++) {
			
 
				+      if (j != axis) {
			
 
				+        MatchingDim(*input_shapes[i], j, output_shape, j);
			
 
				+      }
			
 
				+    }
			
 
				+    concat_size += input_shapes[i]->Dims(axis);
			
 
				+  }
			
 
				+  TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
			
 
				+  int64_t outer_size = 1;
			
 
				+  for (int i = 0; i < axis; ++i) {
			
 
				+    outer_size *= output_shape.Dims(i);
			
 
				+  }
			
 
				+  // For all input arrays,
			
 
				+  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
			
 
				+  int64_t base_inner_size = 1;
			
 
				+  for (int i = axis + 1; i < concat_dimensions; ++i) {
			
 
				+    base_inner_size *= output_shape.Dims(i);
			
 
				+  }
			
 
				+
			
 
				+  const float inverse_output_scale = 1.f / output_scale;
			
 
				+  uint8_t* output_ptr = output_data;
			
 
				+  for (int k = 0; k < outer_size; k++) {
			
 
				+    for (int i = 0; i < inputs_count; ++i) {
			
 
				+      const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
			
 
				+      const uint8_t* input_ptr = input_data[i] + k * copy_size;
			
 
				+      if (input_zeropoint[i] == output_zeropoint &&
			
 
				+          input_scale[i] == output_scale) {
			
 
				+        memcpy(output_ptr, input_ptr, copy_size);
			
 
				+      } else {
			
 
				+        const float scale = input_scale[i] * inverse_output_scale;
			
 
				+        const float bias = -input_zeropoint[i] * scale;
			
 
				+        for (int j = 0; j < copy_size; ++j) {
			
 
				+          const int32_t value = static_cast<int32_t>(tflite::TfLiteRound(
			
 
				+                                    input_ptr[j] * scale + bias)) +
			
 
				+                                output_zeropoint;
			
 
				+          output_ptr[j] = static_cast<uint8_t>(
			
 
				+              std::max<int32_t>(std::min<int32_t>(255, value), 0));
			
 
				+        }
			
 
				+      }
			
 
				+      output_ptr += copy_size;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/conv.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/conv.h
@@ -0,0 +1,262 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+
			
 
				+inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
			
 
				+                 const float* input_data, const RuntimeShape& filter_shape,
			
 
				+                 const float* filter_data, const RuntimeShape& bias_shape,
			
 
				+                 const float* bias_data, const RuntimeShape& output_shape,
			
 
				+                 float* output_data, const RuntimeShape& im2col_shape,
			
 
				+                 float* im2col_data) {
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int dilation_width_factor = params.dilation_width_factor;
			
 
				+  const int dilation_height_factor = params.dilation_height_factor;
			
 
				+  const int pad_width = params.padding_values.width;
			
 
				+  const int pad_height = params.padding_values.height;
			
 
				+  const float output_activation_min = params.float_activation_min;
			
 
				+  const float output_activation_max = params.float_activation_max;
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+
			
 
				+  (void)im2col_data;   // only used in optimized code.
			
 
				+  (void)im2col_shape;  // only used in optimized code.
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
			
 
				+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
			
 
				+  if (bias_data) {
			
 
				+    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				+  }
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int filter_height = filter_shape.Dims(1);
			
 
				+  const int filter_width = filter_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
			
 
				+          const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				+          const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				+          float total = 0.f;
			
 
				+          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				+            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				+              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				+                const int in_y =
			
 
				+                    in_y_origin + dilation_height_factor * filter_y;
			
 
				+                // If the location is outside the bounds of the input image,
			
 
				+                // use zero as a default value.
			
 
				+                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				+                    (in_y < input_height)) {
			
 
				+                  float input_value = input_data[Offset(
			
 
				+                      input_shape, batch, in_y, in_x, in_channel)];
			
 
				+                  float filter_value =
			
 
				+                      filter_data[Offset(filter_shape, out_channel, filter_y,
			
 
				+                                         filter_x, in_channel)];
			
 
				+                  total += (input_value * filter_value);
			
 
				+                }
			
 
				+              }
			
 
				+            }
			
 
				+          }
			
 
				+          float bias_value = 0.0f;
			
 
				+          if (bias_data) {
			
 
				+            bias_value = bias_data[out_channel];
			
 
				+          }
			
 
				+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
			
 
				+              ActivationFunctionWithMinMax(total + bias_value,
			
 
				+                                           output_activation_min,
			
 
				+                                           output_activation_max);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
			
 
				+                 const uint8_t* input_data, const RuntimeShape& filter_shape,
			
 
				+                 const uint8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				+                 const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				+                 uint8_t* output_data, const RuntimeShape& im2col_shape,
			
 
				+                 uint8_t* im2col_data, void* cpu_backend_context) {
			
 
				+  (void)cpu_backend_context;  // only used in optimized code.
			
 
				+  (void)im2col_data;   // only used in optimized code.
			
 
				+  (void)im2col_shape;  // only used in optimized code.
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int dilation_width_factor = params.dilation_width_factor;
			
 
				+  const int dilation_height_factor = params.dilation_height_factor;
			
 
				+  const int pad_width = params.padding_values.width;
			
 
				+  const int pad_height = params.padding_values.height;
			
 
				+  const int32_t input_offset = params.input_offset;
			
 
				+  const int32_t filter_offset = params.weights_offset;
			
 
				+  const int32_t output_offset = params.output_offset;
			
 
				+  const int32_t output_multiplier = params.output_multiplier;
			
 
				+  const int output_shift = params.output_shift;
			
 
				+  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				+  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				+
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
			
 
				+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
			
 
				+  if (bias_data) {
			
 
				+    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				+  }
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int filter_height = filter_shape.Dims(1);
			
 
				+  const int filter_width = filter_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
			
 
				+          const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				+          const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				+          int32_t acc = 0;
			
 
				+          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				+            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				+              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				+                const int in_y =
			
 
				+                    in_y_origin + dilation_height_factor * filter_y;
			
 
				+                // If the location is outside the bounds of the input image,
			
 
				+                // use zero as a default value.
			
 
				+                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				+                    (in_y < input_height)) {
			
 
				+                  int32_t input_val = input_data[Offset(
			
 
				+                      input_shape, batch, in_y, in_x, in_channel)];
			
 
				+                  int32_t filter_val =
			
 
				+                      filter_data[Offset(filter_shape, out_channel, filter_y,
			
 
				+                                         filter_x, in_channel)];
			
 
				+                  acc +=
			
 
				+                      (filter_val + filter_offset) * (input_val + input_offset);
			
 
				+                }
			
 
				+              }
			
 
				+            }
			
 
				+          }
			
 
				+          if (bias_data) {
			
 
				+            acc += bias_data[out_channel];
			
 
				+          }
			
 
				+          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
			
 
				+                                              output_shift);
			
 
				+          acc += output_offset;
			
 
				+          acc = std::max(acc, output_activation_min);
			
 
				+          acc = std::min(acc, output_activation_max);
			
 
				+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
			
 
				+              static_cast<uint8_t>(acc);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void HybridConvPerChannel(
			
 
				+    const ConvParams& params, float* scaling_factors_ptr,
			
 
				+    const RuntimeShape& input_shape, const int8_t* input_data,
			
 
				+    const RuntimeShape& filter_shape, const int8_t* filter_data,
			
 
				+    const RuntimeShape& bias_shape, const float* bias_data,
			
 
				+    const RuntimeShape& output_shape, float* output_data,
			
 
				+    const RuntimeShape& im2col_shape, int8_t* im2col_data,
			
 
				+    const float* per_channel_scale, int32_t* input_offset) {
			
 
				+  (void)im2col_data;   // only used in optimized code.
			
 
				+  (void)im2col_shape;  // only used in optimized code.
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int dilation_width_factor = params.dilation_width_factor;
			
 
				+  const int dilation_height_factor = params.dilation_height_factor;
			
 
				+  const int pad_width = params.padding_values.width;
			
 
				+  const int pad_height = params.padding_values.height;
			
 
				+  const float output_activation_min = params.float_activation_min;
			
 
				+  const float output_activation_max = params.float_activation_max;
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
			
 
				+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
			
 
				+  if (bias_data) {
			
 
				+    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				+  }
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int filter_height = filter_shape.Dims(1);
			
 
				+  const int filter_width = filter_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
			
 
				+          const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				+          const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				+          int32_t acc = 0;
			
 
				+          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				+            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				+              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				+                const int in_y =
			
 
				+                    in_y_origin + dilation_height_factor * filter_y;
			
 
				+                // If the location is outside the bounds of the input image,
			
 
				+                // use zero as a default value.
			
 
				+                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				+                    (in_y < input_height)) {
			
 
				+                  int32_t input_val = input_data[Offset(
			
 
				+                      input_shape, batch, in_y, in_x, in_channel)];
			
 
				+                  int32_t filter_val =
			
 
				+                      filter_data[Offset(filter_shape, out_channel, filter_y,
			
 
				+                                         filter_x, in_channel)];
			
 
				+                  acc += filter_val * (input_val - input_offset[batch]);
			
 
				+                }
			
 
				+              }
			
 
				+            }
			
 
				+          }
			
 
				+          float acc_float =
			
 
				+              acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch];
			
 
				+          if (bias_data) {
			
 
				+            acc_float += bias_data[out_channel];
			
 
				+          }
			
 
				+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
			
 
				+              ActivationFunctionWithMinMax(acc_float, output_activation_min,
			
 
				+                                           output_activation_max);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h
@@ -0,0 +1,100 @@
 
				+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+inline void DepthwiseConv(
			
 
				+    const DepthwiseParams& params, const RuntimeShape& input_shape,
			
 
				+    const float* input_data, const RuntimeShape& filter_shape,
			
 
				+    const float* filter_data, const RuntimeShape& bias_shape,
			
 
				+    const float* bias_data, const RuntimeShape& output_shape,
			
 
				+    float* output_data) {
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int dilation_width_factor = params.dilation_width_factor;
			
 
				+  const int dilation_height_factor = params.dilation_height_factor;
			
 
				+  const int pad_width = params.padding_values.width;
			
 
				+  const int pad_height = params.padding_values.height;
			
 
				+  const int depth_multiplier = params.depth_multiplier;
			
 
				+  const float output_activation_min = params.float_activation_min;
			
 
				+  const float output_activation_max = params.float_activation_max;
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int input_depth = input_shape.Dims(3);
			
 
				+  const int filter_height = filter_shape.Dims(1);
			
 
				+  const int filter_width = filter_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
			
 
				+  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				+
			
 
				+  for (int b = 0; b < batches; ++b) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int ic = 0; ic < input_depth; ++ic) {
			
 
				+          for (int m = 0; m < depth_multiplier; m++) {
			
 
				+            const int oc = m + ic * depth_multiplier;
			
 
				+            const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				+            const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				+            float total = 0.f;
			
 
				+            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				+              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				+                const int in_y =
			
 
				+                    in_y_origin + dilation_height_factor * filter_y;
			
 
				+                // If the location is outside the bounds of the input image,
			
 
				+                // use zero as a default value.
			
 
				+                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				+                    (in_y < input_height)) {
			
 
				+                  float input_value =
			
 
				+                      input_data[Offset(input_shape, b, in_y, in_x, ic)];
			
 
				+                  float filter_value = filter_data[Offset(
			
 
				+                      filter_shape, 0, filter_y, filter_x, oc)];
			
 
				+                  total += (input_value * filter_value);
			
 
				+                }
			
 
				+              }
			
 
				+            }
			
 
				+            float bias_value = 0.0f;
			
 
				+            if (bias_data) {
			
 
				+              bias_value = bias_data[oc];
			
 
				+            }
			
 
				+            output_data[Offset(output_shape, b, out_y, out_x, oc)] =
			
 
				+                ActivationFunctionWithMinMax(total + bias_value,
			
 
				+                                             output_activation_min,
			
 
				+                                             output_activation_max);
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // end namespace reference_ops
			
 
				+}  // end namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h
@@ -0,0 +1,297 @@
 
				+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
			
 
				+
			
 
				+#include <algorithm>
			
 
				+
			
 
				+#include "fixedpoint/fixedpoint.h"
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+// Used in tests and template parameters to control which version of depthwise
			
 
				+// convolution is called. Primarily for reference code, and specializations
			
 
				+// forced in tests.
			
 
				+enum class DepthwiseConvImplementation {
			
 
				+  // Run all tests against kUseStandardEntry even if also testing another
			
 
				+  // kernel, since we need to be sure that the main DepthwiseConv() function in
			
 
				+  // optimized_ops.h dispatches to a correctly-executing kernel.
			
 
				+  kNone = 0,                 // The "default" option: use the normal
			
 
				+                             // DepthwiseConv kernel (entry) function.
			
 
				+  kUseGenericKernel,         // Forced use of generic kernel.
			
 
				+  kUseNeon3x3,               // 3x3 kernel that uses NEON when available.
			
 
				+  kUseNeon3x3DotProduct,     // 3x3 kernel that uses dot-product enabled NEON
			
 
				+                             // when available.
			
 
				+  kUseCModel3x3DotProduct,   // 3x3 kernel, reference C model that is intended
			
 
				+                             // to match overall design NEON code.
			
 
				+  kUseUnwound3x3DotProduct,  // 3x3 kernel, reference C model with unwound loops
			
 
				+                             // and some arrays.
			
 
				+  kUseIntrinsics3x3DotProduct,  // 3x3 kernel using NEON intrinsics.
			
 
				+};
			
 
				+
			
 
				+// Category of depthwise convolution output rounding.
			
 
				+enum class DepthwiseConvOutputRounding {
			
 
				+  kNone = 0,      // Invalid: specific method must be specified.
			
 
				+  kAwayFromZero,  // Original method: exact halves rounded away from zero.
			
 
				+  kUpward,        // Halves towards +infinity: adds 0.5 before truncate.
			
 
				+  // This is where a future kNearestEven would be placed.
			
 
				+};
			
 
				+
			
 
				+// Category of depthwise convolution depth multiplication.
			
 
				+enum class DepthwiseConvDepthMultiplication {
			
 
				+  kNoMultiplication = 0,  // Depth multiplier = 1.
			
 
				+  kUnitInputDepth,        // Input depth = 1, output depth = depth multiplier.
			
 
				+};
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+namespace depthwise_conv {
			
 
				+
			
 
				+template <DepthwiseConvOutputRounding output_rounding>
			
 
				+inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier,
			
 
				+                                  int shift) {
			
 
				+  TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
			
 
				+  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
			
 
				+}
			
 
				+
			
 
				+template <>
			
 
				+inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
			
 
				+    int32_t x, int32_t quantized_multiplier, int shift) {
			
 
				+  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
			
 
				+}
			
 
				+
			
 
				+template <>
			
 
				+inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
			
 
				+    int32_t x, int32_t quantized_multiplier, int shift) {
			
 
				+  using gemmlowp::SaturatingRoundingDoublingHighMul;
			
 
				+  const int left_shift = shift > 0 ? shift : 0;
			
 
				+  const int right_shift = shift > 0 ? 0 : -shift;
			
 
				+  const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0;
			
 
				+  return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
			
 
				+                                            quantized_multiplier) +
			
 
				+          rounding_offset) >>
			
 
				+         right_shift;
			
 
				+}
			
 
				+
			
 
				+template <DepthwiseConvOutputRounding output_rounding>
			
 
				+struct DepthwiseConvBasicKernel {
			
 
				+  static inline void Run(
			
 
				+      const DepthwiseParams& params, const RuntimeShape& input_shape,
			
 
				+      const uint8_t* input_data, const RuntimeShape& filter_shape,
			
 
				+      const uint8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				+      const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				+      uint8_t* output_data) {
			
 
				+    const int stride_width = params.stride_width;
			
 
				+    const int stride_height = params.stride_height;
			
 
				+    const int dilation_width_factor = params.dilation_width_factor;
			
 
				+    const int dilation_height_factor = params.dilation_height_factor;
			
 
				+    const int pad_width = params.padding_values.width;
			
 
				+    const int pad_height = params.padding_values.height;
			
 
				+    const int depth_multiplier = params.depth_multiplier;
			
 
				+    const int32_t output_activation_min = params.quantized_activation_min;
			
 
				+    const int32_t output_activation_max = params.quantized_activation_max;
			
 
				+    const int32_t input_offset = params.input_offset;
			
 
				+    const int32_t filter_offset = params.weights_offset;
			
 
				+    const int32_t output_offset = params.output_offset;
			
 
				+    const int32_t output_multiplier = params.output_multiplier;
			
 
				+    const int output_shift = params.output_shift;
			
 
				+    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+    TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				+    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+
			
 
				+    TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				+    const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+    const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
			
 
				+    const int input_height = input_shape.Dims(1);
			
 
				+    const int input_width = input_shape.Dims(2);
			
 
				+    const int input_depth = input_shape.Dims(3);
			
 
				+    const int filter_height = filter_shape.Dims(1);
			
 
				+    const int filter_width = filter_shape.Dims(2);
			
 
				+    const int output_height = output_shape.Dims(1);
			
 
				+    const int output_width = output_shape.Dims(2);
			
 
				+    TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
			
 
				+    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				+
			
 
				+    for (int b = 0; b < batches; ++b) {
			
 
				+      for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+        for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+          for (int ic = 0; ic < input_depth; ++ic) {
			
 
				+            for (int m = 0; m < depth_multiplier; m++) {
			
 
				+              const int oc = m + ic * depth_multiplier;
			
 
				+              const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				+              const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				+              int32_t acc = 0;
			
 
				+              for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				+                for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				+                  const int in_x =
			
 
				+                      in_x_origin + dilation_width_factor * filter_x;
			
 
				+                  const int in_y =
			
 
				+                      in_y_origin + dilation_height_factor * filter_y;
			
 
				+                  // If the location is outside the bounds of the input image,
			
 
				+                  // use zero as a default value.
			
 
				+                  if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				+                      (in_y < input_height)) {
			
 
				+                    int32_t input_val =
			
 
				+                        input_data[Offset(input_shape, b, in_y, in_x, ic)];
			
 
				+                    int32_t filter_val = filter_data[Offset(
			
 
				+                        filter_shape, 0, filter_y, filter_x, oc)];
			
 
				+                    acc += (filter_val + filter_offset) *
			
 
				+                           (input_val + input_offset);
			
 
				+                  }
			
 
				+                }
			
 
				+              }
			
 
				+              if (bias_data) {
			
 
				+                acc += bias_data[oc];
			
 
				+              }
			
 
				+              acc = DepthwiseConvRound<output_rounding>(acc, output_multiplier,
			
 
				+                                                        output_shift);
			
 
				+              acc += output_offset;
			
 
				+              acc = std::max(acc, output_activation_min);
			
 
				+              acc = std::min(acc, output_activation_max);
			
 
				+              output_data[Offset(output_shape, b, out_y, out_x, oc)] =
			
 
				+                  static_cast<uint8_t>(acc);
			
 
				+            }
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // TODO(b/148596273): Reconcile reference versions, perhaps with common
			
 
				+  // MultiplyByQuantizedMultiplier or DepthwiseConvRound function.
			
 
				+  static inline void RunPerChannel(
			
 
				+      const DepthwiseParams& params, const RuntimeShape& input_shape,
			
 
				+      const int8_t* input_data, const RuntimeShape& filter_shape,
			
 
				+      const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				+      const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				+      int8_t* output_data) {
			
 
				+    // Get parameters.
			
 
				+    // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
			
 
				+    const int stride_width = params.stride_width;
			
 
				+    const int stride_height = params.stride_height;
			
 
				+    const int dilation_width_factor = params.dilation_width_factor;
			
 
				+    const int dilation_height_factor = params.dilation_height_factor;
			
 
				+    const int pad_width = params.padding_values.width;
			
 
				+    const int pad_height = params.padding_values.height;
			
 
				+    const int depth_multiplier = params.depth_multiplier;
			
 
				+    const int32_t input_offset = params.input_offset;
			
 
				+    const int32_t output_offset = params.output_offset;
			
 
				+    const int32_t output_activation_min = params.quantized_activation_min;
			
 
				+    const int32_t output_activation_max = params.quantized_activation_max;
			
 
				+    const int32_t* output_multiplier = params.output_multiplier_per_channel;
			
 
				+    const int32_t* output_shift = params.output_shift_per_channel;
			
 
				+
			
 
				+    // Check dimensions of the tensors.
			
 
				+    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+    TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				+    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+
			
 
				+    TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				+    const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+    const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
			
 
				+    const int input_height = input_shape.Dims(1);
			
 
				+    const int input_width = input_shape.Dims(2);
			
 
				+    const int input_depth = input_shape.Dims(3);
			
 
				+    const int filter_height = filter_shape.Dims(1);
			
 
				+    const int filter_width = filter_shape.Dims(2);
			
 
				+    const int output_height = output_shape.Dims(1);
			
 
				+    const int output_width = output_shape.Dims(2);
			
 
				+    TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
			
 
				+    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				+
			
 
				+    for (int batch = 0; batch < batches; ++batch) {
			
 
				+      for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+        for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+          for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				+            for (int m = 0; m < depth_multiplier; ++m) {
			
 
				+              const int output_channel = m + in_channel * depth_multiplier;
			
 
				+              const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				+              const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				+              int32_t acc = 0;
			
 
				+              for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				+                for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				+                  const int in_x =
			
 
				+                      in_x_origin + dilation_width_factor * filter_x;
			
 
				+                  const int in_y =
			
 
				+                      in_y_origin + dilation_height_factor * filter_y;
			
 
				+                  // Zero padding by omitting the areas outside the image.
			
 
				+                  const bool is_point_inside_image =
			
 
				+                      (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				+                      (in_y < input_height);
			
 
				+                  if (is_point_inside_image) {
			
 
				+                    int32_t input_val = input_data[Offset(
			
 
				+                        input_shape, batch, in_y, in_x, in_channel)];
			
 
				+                    int32_t filter_val = filter_data[Offset(
			
 
				+                        filter_shape, 0, filter_y, filter_x, output_channel)];
			
 
				+                    // Accumulate with 32 bits accumulator.
			
 
				+                    // In the nudging process during model quantization, we
			
 
				+                    // force real value of 0.0 be represented by a quantized
			
 
				+                    // value. This guarantees that the input_offset is a int8_t,
			
 
				+                    // even though it is represented using int32_t. int32_t +=
			
 
				+                    // int8_t
			
 
				+                    // * (int8_t - int8_t) so the highest value we can get from
			
 
				+                    // each accumulation is [-127, 127] * ([-128, 127] -
			
 
				+                    // [-128, 127]), which is [-32512, 32512]. log2(32512)
			
 
				+                    // = 14.98, which means we can accumulate at least 2^16
			
 
				+                    // multiplications without overflow. The accumulator is
			
 
				+                    // applied to a filter so the accumulation logic will hold
			
 
				+                    // as long as the filter size (filter_y * filter_x *
			
 
				+                    // in_channel) does not exceed 2^16, which is the case in
			
 
				+                    // all the models we have seen so far.
			
 
				+                    acc += filter_val * (input_val + input_offset);
			
 
				+                  }
			
 
				+                }
			
 
				+              }
			
 
				+              if (bias_data) {
			
 
				+                acc += bias_data[output_channel];
			
 
				+              }
			
 
				+              acc = DepthwiseConvRound<output_rounding>(
			
 
				+                  acc, output_multiplier[output_channel],
			
 
				+                  output_shift[output_channel]);
			
 
				+              acc += output_offset;
			
 
				+              acc = std::max(acc, output_activation_min);
			
 
				+              acc = std::min(acc, output_activation_max);
			
 
				+              output_data[Offset(output_shape, batch, out_y, out_x,
			
 
				+                                 output_channel)] = static_cast<int8_t>(acc);
			
 
				+            }
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+}  // namespace depthwise_conv
			
 
				+
			
 
				+inline void DepthwiseConv(
			
 
				+    const DepthwiseParams& params, const RuntimeShape& input_shape,
			
 
				+    const uint8_t* input_data, const RuntimeShape& filter_shape,
			
 
				+    const uint8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				+    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				+    uint8_t* output_data) {
			
 
				+  return depthwise_conv::DepthwiseConvBasicKernel<
			
 
				+      DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
			
 
				+                                                       input_data, filter_shape,
			
 
				+                                                       filter_data, bias_shape,
			
 
				+                                                       bias_data, output_shape,
			
 
				+                                                       output_data);
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // end namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/dequantize.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/dequantize.h
@@ -0,0 +1,78 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
			
 
				+
			
 
				+#include <limits.h>
			
 
				+
			
 
				+#include <vector>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+// Dequantizes into a float without rounding.
			
 
				+template <typename InputT, typename OutputT>
			
 
				+inline void Dequantize(const tflite::DequantizationParams& op_params,
			
 
				+                       const RuntimeShape& input_shape,
			
 
				+                       const InputT* input_data,
			
 
				+                       const RuntimeShape& output_shape, OutputT* output_data) {
			
 
				+  int32_t zero_point = op_params.zero_point;
			
 
				+  const double scale = op_params.scale;
			
 
				+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+
			
 
				+  for (int i = 0; i < flat_size; i++) {
			
 
				+    const int32_t val = input_data[i];
			
 
				+    const OutputT result = static_cast<OutputT>(scale * (val - zero_point));
			
 
				+    output_data[i] = result;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Dequantizes per-channel quantized tensor to float.
			
 
				+template <typename T>
			
 
				+inline void PerChannelDequantize(
			
 
				+    const tflite::PerChannelDequantizationParams& op_params,
			
 
				+    const RuntimeShape& input_shape, const T* input_data,
			
 
				+    const RuntimeShape& output_shape, float* output_data) {
			
 
				+  // Ensure flat size is same.
			
 
				+  MatchingFlatSize(input_shape, output_shape);
			
 
				+
			
 
				+  const int32_t* zero_point = op_params.zero_point;
			
 
				+  const float* scale = op_params.scale;
			
 
				+  const int32_t quantized_dimension = op_params.quantized_dimension;
			
 
				+  const int32_t num_dims = input_shape.DimensionsCount();
			
 
				+  const int32_t* dims_data = input_shape.DimsData();
			
 
				+  std::vector<int> current_dim(num_dims, 0);
			
 
				+
			
 
				+  do {
			
 
				+    size_t offset =
			
 
				+        ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
			
 
				+                            current_dim.data(), 0, nullptr);
			
 
				+    const int channel = current_dim[quantized_dimension];
			
 
				+    const int32_t val = input_data[offset];
			
 
				+    const float result =
			
 
				+        static_cast<float>(scale[channel] * (val - zero_point[channel]));
			
 
				+    output_data[offset] = result;
			
 
				+  } while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
			
 
				+                     current_dim.data()));
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/floor.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/floor.h
@@ -0,0 +1,39 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
			
 
				+
			
 
				+#include <cmath>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+inline void Floor(const RuntimeShape& input_shape, const float* input_data,
			
 
				+                  const RuntimeShape& output_shape, float* output_data) {
			
 
				+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+
			
 
				+  for (int i = 0; i < flat_size; i++) {
			
 
				+    int offset = i;
			
 
				+    output_data[offset] = std::floor(input_data[offset]);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/fully_connected.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/fully_connected.h
@@ -0,0 +1,320 @@
 
				+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				+#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+inline void FullyConnected(
			
 
				+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
			
 
				+    const float* input_data, const RuntimeShape& weights_shape,
			
 
				+    const float* weights_data, const RuntimeShape& bias_shape,
			
 
				+    const float* bias_data, const RuntimeShape& output_shape,
			
 
				+    float* output_data) {
			
 
				+  const float output_activation_min = params.float_activation_min;
			
 
				+  const float output_activation_max = params.float_activation_max;
			
 
				+  // TODO(benoitjacob): This really should be:
			
 
				+  //     const int batches = ArraySize(output_dims, 1);
			
 
				+  // but the current --variable_batch hack consists in overwriting the 3rd
			
 
				+  // dimension with the runtime batch size, as we don't keep track for each
			
 
				+  // array of which dimension is the batch dimension in it.
			
 
				+  const int output_dims_count = output_shape.DimensionsCount();
			
 
				+  const int weights_dims_count = weights_shape.DimensionsCount();
			
 
				+  const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
			
 
				+  const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
			
 
				+                                       output_shape, output_dims_count - 1);
			
 
				+  const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
			
 
				+  for (int b = 0; b < batches; ++b) {
			
 
				+    for (int out_c = 0; out_c < output_depth; ++out_c) {
			
 
				+      float total = 0.f;
			
 
				+      for (int d = 0; d < accum_depth; ++d) {
			
 
				+        total += input_data[b * accum_depth + d] *
			
 
				+                 weights_data[out_c * accum_depth + d];
			
 
				+      }
			
 
				+      float bias_value = 0.0f;
			
 
				+      if (bias_data) {
			
 
				+        bias_value = bias_data[out_c];
			
 
				+      }
			
 
				+      output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
			
 
				+          total + bias_value, output_activation_min, output_activation_max);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void FullyConnected(
			
 
				+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
			
 
				+    const uint8_t* input_data, const RuntimeShape& filter_shape,
			
 
				+    const uint8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				+    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				+    uint8_t* output_data) {
			
 
				+  const int32_t input_offset = params.input_offset;
			
 
				+  const int32_t filter_offset = params.weights_offset;
			
 
				+  const int32_t output_offset = params.output_offset;
			
 
				+  const int32_t output_multiplier = params.output_multiplier;
			
 
				+  const int output_shift = params.output_shift;
			
 
				+  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				+  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				+  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
			
 
				+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
			
 
				+
			
 
				+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				+  // TODO(benoitjacob): This really should be:
			
 
				+  //     const int batches = ArraySize(output_dims, 1);
			
 
				+  // but the current --variable_batch hack consists in overwriting the 3rd
			
 
				+  // dimension with the runtime batch size, as we don't keep track for each
			
 
				+  // array of which dimension is the batch dimension in it.
			
 
				+  const int output_dim_count = output_shape.DimensionsCount();
			
 
				+  const int filter_dim_count = filter_shape.DimensionsCount();
			
 
				+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
			
 
				+  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
			
 
				+                                       output_shape, output_dim_count - 1);
			
 
				+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
			
 
				+  for (int b = 0; b < batches; ++b) {
			
 
				+    for (int out_c = 0; out_c < output_depth; ++out_c) {
			
 
				+      int32_t acc = 0;
			
 
				+      for (int d = 0; d < accum_depth; ++d) {
			
 
				+        int32_t input_val = input_data[b * accum_depth + d];
			
 
				+        int32_t filter_val = filter_data[out_c * accum_depth + d];
			
 
				+        acc += (filter_val + filter_offset) * (input_val + input_offset);
			
 
				+      }
			
 
				+      if (bias_data) {
			
 
				+        acc += bias_data[out_c];
			
 
				+      }
			
 
				+      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
			
 
				+      acc += output_offset;
			
 
				+      acc = std::max(acc, output_activation_min);
			
 
				+      acc = std::min(acc, output_activation_max);
			
 
				+      output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void FullyConnected(
			
 
				+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
			
 
				+    const uint8_t* input_data, const RuntimeShape& filter_shape,
			
 
				+    const uint8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				+    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				+    int16_t* output_data) {
			
 
				+  const int32_t input_offset = params.input_offset;
			
 
				+  const int32_t filter_offset = params.weights_offset;
			
 
				+  const int32_t output_offset = params.output_offset;
			
 
				+  const int32_t output_multiplier = params.output_multiplier;
			
 
				+  const int output_shift = params.output_shift;
			
 
				+  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				+  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				+
			
 
				+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				+  TFLITE_DCHECK_EQ(output_offset, 0);
			
 
				+  // TODO(benoitjacob): This really should be:
			
 
				+  //     const int batches = ArraySize(output_dims, 1);
			
 
				+  // but the current --variable_batch hack consists in overwriting the 3rd
			
 
				+  // dimension with the runtime batch size, as we don't keep track for each
			
 
				+  // array of which dimension is the batch dimension in it.
			
 
				+  const int output_dim_count = output_shape.DimensionsCount();
			
 
				+  const int filter_dim_count = filter_shape.DimensionsCount();
			
 
				+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
			
 
				+  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
			
 
				+                                       output_shape, output_dim_count - 1);
			
 
				+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
			
 
				+  for (int b = 0; b < batches; ++b) {
			
 
				+    for (int out_c = 0; out_c < output_depth; ++out_c) {
			
 
				+      // Internal accumulation.
			
 
				+      // Initialize accumulator with the bias-value.
			
 
				+      int32_t accum = bias_data[out_c];
			
 
				+      // Accumulation loop.
			
 
				+      for (int d = 0; d < accum_depth; ++d) {
			
 
				+        int16_t input_val = input_data[b * accum_depth + d] + input_offset;
			
 
				+        int16_t filter_val =
			
 
				+            filter_data[out_c * accum_depth + d] + filter_offset;
			
 
				+        accum += filter_val * input_val;
			
 
				+      }
			
 
				+      // Down-scale the final int32_t accumulator to the scale used by our
			
 
				+      // (16-bit, typically 3 integer bits) fixed-point format. The quantized
			
 
				+      // multiplier and shift here have been pre-computed offline
			
 
				+      // (e.g. by toco).
			
 
				+      accum =
			
 
				+          MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
			
 
				+      // Saturate, cast to int16_t, and store to output array.
			
 
				+      accum = std::max(accum, output_activation_min - output_offset);
			
 
				+      accum = std::min(accum, output_activation_max - output_offset);
			
 
				+      accum += output_offset;
			
 
				+      output_data[out_c + output_depth * b] = accum;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void ShuffledFullyConnected(
			
 
				+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
			
 
				+    const uint8_t* input_data, const RuntimeShape& weights_shape,
			
 
				+    const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape,
			
 
				+    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				+    int16_t* output_data, uint8_t* shuffled_input_workspace_data) {
			
 
				+  const int32_t output_multiplier = params.output_multiplier;
			
 
				+  const int output_shift = params.output_shift;
			
 
				+  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				+  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				+
			
 
				+  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
			
 
				+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
			
 
				+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
			
 
				+  // TODO(benoitjacob): This really should be:
			
 
				+  //     const int batches = ArraySize(output_dims, 1);
			
 
				+  // but the current --variable_batch hack consists in overwriting the 3rd
			
 
				+  // dimension with the runtime batch size, as we don't keep track for each
			
 
				+  // array of which dimension is the batch dimension in it.
			
 
				+  const int output_dim_count = output_shape.DimensionsCount();
			
 
				+  const int weights_dim_count = weights_shape.DimensionsCount();
			
 
				+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
			
 
				+  const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
			
 
				+                                       output_shape, output_dim_count - 1);
			
 
				+  const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
			
 
				+  TFLITE_DCHECK((accum_depth % 16) == 0);
			
 
				+  TFLITE_DCHECK((output_depth % 4) == 0);
			
 
				+
			
 
				+  // Shuffling and xoring of input activations into the workspace buffer
			
 
				+  uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
			
 
				+  if (batches == 1) {
			
 
				+    for (int i = 0; i < accum_depth; i++) {
			
 
				+      shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
			
 
				+    }
			
 
				+  } else if (batches == 4) {
			
 
				+    for (int c = 0; c < accum_depth; c += 16) {
			
 
				+      for (int b = 0; b < 4; b++) {
			
 
				+        const uint8_t* src_data_ptr = input_data + b * accum_depth + c;
			
 
				+        for (int j = 0; j < 16; j++) {
			
 
				+          uint8_t src_val = *src_data_ptr++;
			
 
				+          // Flip the sign bit, so that the kernel will only need to
			
 
				+          // reinterpret these uint8_t values as int8_t, getting for free the
			
 
				+          // subtraction of the zero_point value 128.
			
 
				+          uint8_t dst_val = src_val ^ 0x80;
			
 
				+          *shuffled_input_workspace_ptr++ = dst_val;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  } else {
			
 
				+    TFLITE_DCHECK(false);
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  // Actual computation
			
 
				+  if (batches == 1) {
			
 
				+    int16_t* output_ptr = output_data;
			
 
				+    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
			
 
				+    // so that just reinterpreting them as int8_t values is equivalent to
			
 
				+    // subtracting 128 from them, thus implementing for free the subtraction of
			
 
				+    // the zero_point value 128.
			
 
				+    const int8_t* shuffled_weights_ptr =
			
 
				+        reinterpret_cast<const int8_t*>(shuffled_weights_data);
			
 
				+    // Likewise, we preshuffled and pre-xored the input data above.
			
 
				+    const int8_t* shuffled_input_data =
			
 
				+        reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
			
 
				+    for (int c = 0; c < output_depth; c += 4) {
			
 
				+      // Internal accumulation.
			
 
				+      // Initialize accumulator with the bias-value.
			
 
				+      int32_t accum[4] = {0};
			
 
				+      // Accumulation loop.
			
 
				+      for (int d = 0; d < accum_depth; d += 16) {
			
 
				+        for (int i = 0; i < 4; i++) {
			
 
				+          for (int j = 0; j < 16; j++) {
			
 
				+            int8_t input_val = shuffled_input_data[d + j];
			
 
				+            int8_t weights_val = *shuffled_weights_ptr++;
			
 
				+            accum[i] += weights_val * input_val;
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+      for (int i = 0; i < 4; i++) {
			
 
				+        // Add bias value
			
 
				+        int32_t acc = accum[i] + bias_data[c + i];
			
 
				+        // Down-scale the final int32_t accumulator to the scale used by our
			
 
				+        // (16-bit, typically 3 integer bits) fixed-point format. The quantized
			
 
				+        // multiplier and shift here have been pre-computed offline
			
 
				+        // (e.g. by toco).
			
 
				+        acc =
			
 
				+            MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
			
 
				+        // Saturate, cast to int16_t, and store to output array.
			
 
				+        acc = std::max(acc, output_activation_min);
			
 
				+        acc = std::min(acc, output_activation_max);
			
 
				+        output_ptr[c + i] = acc;
			
 
				+      }
			
 
				+    }
			
 
				+  } else if (batches == 4) {
			
 
				+    int16_t* output_ptr = output_data;
			
 
				+    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
			
 
				+    // so that just reinterpreting them as int8_t values is equivalent to
			
 
				+    // subtracting 128 from them, thus implementing for free the subtraction of
			
 
				+    // the zero_point value 128.
			
 
				+    const int8_t* shuffled_weights_ptr =
			
 
				+        reinterpret_cast<const int8_t*>(shuffled_weights_data);
			
 
				+    // Likewise, we preshuffled and pre-xored the input data above.
			
 
				+    const int8_t* shuffled_input_data =
			
 
				+        reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
			
 
				+    for (int c = 0; c < output_depth; c += 4) {
			
 
				+      const int8_t* shuffled_input_ptr = shuffled_input_data;
			
 
				+      // Accumulation loop.
			
 
				+      // Internal accumulation.
			
 
				+      // Initialize accumulator with the bias-value.
			
 
				+      int32_t accum[4][4];
			
 
				+      for (int i = 0; i < 4; i++) {
			
 
				+        for (int b = 0; b < 4; b++) {
			
 
				+          accum[i][b] = 0;
			
 
				+        }
			
 
				+      }
			
 
				+      for (int d = 0; d < accum_depth; d += 16) {
			
 
				+        for (int i = 0; i < 4; i++) {
			
 
				+          for (int b = 0; b < 4; b++) {
			
 
				+            for (int j = 0; j < 16; j++) {
			
 
				+              int8_t input_val = shuffled_input_ptr[16 * b + j];
			
 
				+              int8_t weights_val = shuffled_weights_ptr[16 * i + j];
			
 
				+              accum[i][b] += weights_val * input_val;
			
 
				+            }
			
 
				+          }
			
 
				+        }
			
 
				+        shuffled_input_ptr += 64;
			
 
				+        shuffled_weights_ptr += 64;
			
 
				+      }
			
 
				+      for (int i = 0; i < 4; i++) {
			
 
				+        for (int b = 0; b < 4; b++) {
			
 
				+          // Add bias value
			
 
				+          int32_t acc = accum[i][b] + bias_data[c + i];
			
 
				+          // Down-scale the final int32_t accumulator to the scale used by our
			
 
				+          // (16-bit, typically 3 integer bits) fixed-point format. The
			
 
				+          // quantized multiplier and shift here have been pre-computed offline
			
 
				+          // (e.g. by toco).
			
 
				+          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
			
 
				+                                              output_shift);
			
 
				+          // Saturate, cast to int16_t, and store to output array.
			
 
				+          acc = std::max(acc, output_activation_min);
			
 
				+          acc = std::min(acc, output_activation_max);
			
 
				+          output_ptr[b * output_depth + c + i] = acc;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  } else {
			
 
				+    TFLITE_DCHECK(false);
			
 
				+    return;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/hard_swish.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/hard_swish.h
@@ -0,0 +1,166 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
			
 
				+
			
 
				+#include "ruy/profiler/instrumentation.h"  // from @ruy
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+inline int16_t SaturatingLeftShift(int16_t value, int amount) {
			
 
				+  int32_t result = static_cast<int32_t>(value) * (1 << amount);
			
 
				+  result = std::min<int32_t>(result, std::numeric_limits<int16_t>::max());
			
 
				+  result = std::max<int32_t>(result, std::numeric_limits<int16_t>::min());
			
 
				+  return result;
			
 
				+}
			
 
				+
			
 
				+// Similar to ARM instruction SQDMULH.
			
 
				+// Similar to gemmlowp::SaturatingRoundingDoublingHighMul except
			
 
				+// rounding to zero instead of to nearest (SQRDMULH).
			
 
				+inline std::int16_t SaturatingDoublingHighMul(std::int16_t a, std::int16_t b) {
			
 
				+  bool overflow = a == b && a == std::numeric_limits<std::int16_t>::min();
			
 
				+  std::int32_t a_32(a);
			
 
				+  std::int32_t b_32(b);
			
 
				+  std::int32_t ab_32 = a_32 * b_32;
			
 
				+  std::int16_t ab_x2_high16 = static_cast<std::int16_t>((ab_32) / (1 << 15));
			
 
				+  return overflow ? std::numeric_limits<std::int16_t>::max() : ab_x2_high16;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void HardSwish(const RuntimeShape& input_shape, const T* input_data,
			
 
				+                      const RuntimeShape& output_shape, T* output_data) {
			
 
				+  ruy::profiler::ScopeLabel label("ReferenceHardSwish/Float");
			
 
				+  auto matching_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+  const T* in_end = input_data + matching_size;
			
 
				+  for (; input_data < in_end; input_data++, output_data++) {
			
 
				+    const float in = *input_data;
			
 
				+    *output_data =
			
 
				+        in * std::min(static_cast<T>(6), std::max(static_cast<T>(0), in + 3)) /
			
 
				+        6;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void HardSwish(const HardSwishParams& params,
			
 
				+                      const RuntimeShape& input_shape, const T* input_data,
			
 
				+                      const RuntimeShape& output_shape, T* output_data) {
			
 
				+  ruy::profiler::ScopeLabel label("ReferenceHardSwish/Quantized");
			
 
				+
			
 
				+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+
			
 
				+  for (int i = 0; i < flat_size; i++) {
			
 
				+    const int16_t input_value = input_data[i] - params.input_zero_point;
			
 
				+    // Left-shift as much as we can without overflow/saturation to put
			
 
				+    // significant bits in the high bits of our 16-bit fixedpoint values, so
			
 
				+    // that fixed-point approximate computations below are as accurate as
			
 
				+    // possible.
			
 
				+    const int16_t input_value_on_hires_input_scale = input_value * (1 << 7);
			
 
				+    // Compute the input value on essentially the output scale, just not
			
 
				+    // right-shifted yet. This is the value that we'll use in the (x >= +3)
			
 
				+    // case, and that in the general case we'll multiply against the "relu-ish"
			
 
				+    // fixed-point multiplier in [0, 1].
			
 
				+    const int16_t input_value_on_preshift_output_scale =
			
 
				+        gemmlowp::SaturatingRoundingDoublingHighMul(
			
 
				+            input_value_on_hires_input_scale,
			
 
				+            params.output_multiplier_fixedpoint_int16);
			
 
				+    // Now compute the "relu-ish multiplier". In the (-3 <= x <= +3) case, that
			
 
				+    // is just an affine rescaling of x from [-3, 3] to [0, 1]. In the general
			
 
				+    // case, it is just that plus saturation at the boundaries of [-3, 3].
			
 
				+    // First, we rescale from [-3, 3] to [-1, 1], saturating.
			
 
				+    // That is done by rescaling the input value with a fixed-point multiplier
			
 
				+    // (reluish_multiplier_fixedpoint) and bit-shift such that we represent
			
 
				+    // that input value on the scale where the real value 3.0f is represented
			
 
				+    // by the quantized value 32768.  (+32768 is actually not representable as
			
 
				+    // int16_t, so this saturates at +32767, and that is seen empirically to be
			
 
				+    // a negligible contribution to numerical error/bias).
			
 
				+    //
			
 
				+    // This code is careful to correctly implement any magnitude of multiplier,
			
 
				+    // involving either a right shift or a left shift, with correct saturation
			
 
				+    // behavior in the left-shift case. This forces this code to be more
			
 
				+    // complicated, but is necessary for real applications: a partially
			
 
				+    // trained quantized MobileNet v3-small model that motivated this code
			
 
				+    // exhibits some large [min, max] range boundaries, of the order of
			
 
				+    // magnitude of 10 or 100 depending on layers.
			
 
				+    //
			
 
				+    // The next few lines are basically just an ordinary
			
 
				+    // MultiplyByQuantizedMultiplier, except that we are more careful here
			
 
				+    // about the fine details of saturation when left-shifting, because here
			
 
				+    // overflow in left-shift is a common case, not an anomaly as
			
 
				+    // MultiplyByQuantizedMultiplier assumes.
			
 
				+    int16_t reluish_value = input_value_on_hires_input_scale;
			
 
				+    // Shift left, saturating, as much as we can while ensuring that this
			
 
				+    // saturation will not contribute to the result. That is, left shift amount
			
 
				+    // reduced by 1.
			
 
				+    if (params.reluish_multiplier_exponent > 0) {
			
 
				+      reluish_value = SaturatingLeftShift(
			
 
				+          reluish_value, params.reluish_multiplier_exponent - 1);
			
 
				+    }
			
 
				+    // Apply the fixed-point multiplier, dividing the value by a divisor
			
 
				+    // ranging in [1, 2].
			
 
				+    reluish_value = gemmlowp::SaturatingRoundingDoublingHighMul(
			
 
				+        reluish_value, params.reluish_multiplier_fixedpoint_int16);
			
 
				+    // Apply the last bit of left-shift. Thus, in the left-shifting case, if
			
 
				+    // any saturation affects the result, it is happening here --- any
			
 
				+    // saturation having occurred above is overwritten here, not affecting the
			
 
				+    // result.
			
 
				+    if (params.reluish_multiplier_exponent > 0) {
			
 
				+      reluish_value = SaturatingLeftShift(reluish_value, 1);
			
 
				+    }
			
 
				+    // Shift right, in the right-shifting case.
			
 
				+    if (params.reluish_multiplier_exponent < 0) {
			
 
				+      reluish_value = gemmlowp::RoundingDivideByPOT(
			
 
				+          reluish_value, -params.reluish_multiplier_exponent);
			
 
				+    }
			
 
				+    // At this point we have rescaled the value into a 16bit fixedpoint
			
 
				+    // reluish_value in [-1, 1].
			
 
				+    // We now convert that to a 16bit fixedpoint value in [0, 1].
			
 
				+    reluish_value = (reluish_value + (1 << 15)) >> 1;
			
 
				+    // Use of SaturatingDoublingHighMul here is important to cancel the biases
			
 
				+    // from the above SaturatingRoundingDoublingHighMul.
			
 
				+    //
			
 
				+    // On a partially trained MobileNet-v3-small,
			
 
				+    //
			
 
				+    //                                       | bias on    |  ImageNet
			
 
				+    //                                       | quantized  |  Top-1
			
 
				+    // Operation used here                   | values     |  accuracy (50k)
			
 
				+    // --------------------------------------+------------+-----------
			
 
				+    // SaturatingDoublingHighMul             | -0.0024    |  58.920
			
 
				+    // SaturatingRoundingDoublingHighMul     | -0.0067    |  58.064
			
 
				+    //
			
 
				+    // In activations_test, this is covered by this testcase:
			
 
				+    //     QuantizedActivationsOpTest.HardSwishBias
			
 
				+    //
			
 
				+    const int16_t preshift_output_value = SaturatingDoublingHighMul(
			
 
				+        reluish_value, input_value_on_preshift_output_scale);
			
 
				+    // We were so far operating on the pre-shift output scale. Now we finally
			
 
				+    // apply that output shift, arriving at the final output scale.
			
 
				+    int16_t output_value = gemmlowp::RoundingDivideByPOT(
			
 
				+        preshift_output_value, -params.output_multiplier_exponent);
			
 
				+    output_value += params.output_zero_point;
			
 
				+    output_value =
			
 
				+        std::min<int16_t>(output_value, std::numeric_limits<T>::max());
			
 
				+    output_value =
			
 
				+        std::max<int16_t>(output_value, std::numeric_limits<T>::min());
			
 
				+    output_data[i] = output_value;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
@@ -0,0 +1,145 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
			
 
				+
			
 
				+#include <limits>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_integer_ops {
			
 
				+
			
 
				+inline void CheckArithmeticParams(const ArithmeticParams& params) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+  // Input offset is negative input zero point. Activation tensors are
			
 
				+  // asymmetric quantized so they span the full int8 range.
			
 
				+  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
			
 
				+  TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits<int8_t>::min());
			
 
				+  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
			
 
				+  TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
			
 
				+}
			
 
				+
			
 
				+// Element-wise add that can often be used for inner loop of broadcast add as
			
 
				+// well as the non-broadcast add.
			
 
				+inline void AddElementwise(int size, const ArithmeticParams& params,
			
 
				+                           const int8_t* input1_data, const int8_t* input2_data,
			
 
				+                           int8_t* output_data) {
			
 
				+  CheckArithmeticParams(params);
			
 
				+
			
 
				+  for (int i = 0; i < size; ++i) {
			
 
				+    const int32_t input1_val = params.input1_offset + input1_data[i];
			
 
				+    const int32_t input2_val = params.input2_offset + input2_data[i];
			
 
				+    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
			
 
				+    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
			
 
				+    const int32_t scaled_input1_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input1_val, params.input1_multiplier, params.input1_shift);
			
 
				+    const int32_t scaled_input2_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input2_val, params.input2_multiplier, params.input2_shift);
			
 
				+    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
			
 
				+    const int32_t raw_output =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            raw_sum, params.output_multiplier, params.output_shift) +
			
 
				+        params.output_offset;
			
 
				+    const int32_t clamped_output =
			
 
				+        std::min(params.quantized_activation_max,
			
 
				+                 std::max(params.quantized_activation_min, raw_output));
			
 
				+    output_data[i] = static_cast<int8_t>(clamped_output);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void Add(const ArithmeticParams& params,
			
 
				+                const RuntimeShape& input1_shape, const int8_t* input1_data,
			
 
				+                const RuntimeShape& input2_shape, const int8_t* input2_data,
			
 
				+                const RuntimeShape& output_shape, int8_t* output_data) {
			
 
				+  CheckArithmeticParams(params);
			
 
				+
			
 
				+  const int flat_size =
			
 
				+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				+
			
 
				+  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				+}
			
 
				+
			
 
				+inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
			
 
				+                               const RuntimeShape& input1_shape,
			
 
				+                               const int8_t* input1_data,
			
 
				+                               const RuntimeShape& input2_shape,
			
 
				+                               const int8_t* input2_data,
			
 
				+                               const RuntimeShape& output_shape,
			
 
				+                               int8_t* output_data) {
			
 
				+  NdArrayDesc<4> desc1;
			
 
				+  NdArrayDesc<4> desc2;
			
 
				+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				+                                      &desc2);
			
 
				+  const RuntimeShape extended_output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, output_shape);
			
 
				+
			
 
				+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				+  // col, channel), with extents (batches, height, width, depth), with the
			
 
				+  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				+  // typically 1 element).
			
 
				+  //
			
 
				+  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				+  // first dimension has smallest stride.
			
 
				+  //
			
 
				+  // We name our variables by their Tensorflow convention, but generate C code
			
 
				+  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				+  // best cache behavior.
			
 
				+  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
			
 
				+    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
			
 
				+      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
			
 
				+        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
			
 
				+          const int32_t input1_val =
			
 
				+              params.input1_offset +
			
 
				+              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
			
 
				+          const int32_t input2_val =
			
 
				+              params.input2_offset +
			
 
				+              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
			
 
				+          const int32_t shifted_input1_val =
			
 
				+              input1_val * (1 << params.left_shift);
			
 
				+          const int32_t shifted_input2_val =
			
 
				+              input2_val * (1 << params.left_shift);
			
 
				+          const int32_t scaled_input1_val =
			
 
				+              MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+                  shifted_input1_val, params.input1_multiplier,
			
 
				+                  params.input1_shift);
			
 
				+          const int32_t scaled_input2_val =
			
 
				+              MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+                  shifted_input2_val, params.input2_multiplier,
			
 
				+                  params.input2_shift);
			
 
				+          const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
			
 
				+          const int32_t raw_output =
			
 
				+              MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+                  raw_sum, params.output_multiplier, params.output_shift) +
			
 
				+              params.output_offset;
			
 
				+          const int32_t clamped_output =
			
 
				+              std::min(params.quantized_activation_max,
			
 
				+                       std::max(params.quantized_activation_min, raw_output));
			
 
				+          output_data[Offset(extended_output_shape, b, y, x, c)] =
			
 
				+              static_cast<int8_t>(clamped_output);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_integer_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
@@ -0,0 +1,217 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_integer_ops {
			
 
				+
			
 
				+// Fixed-point per-channel-quantization convolution reference kernel.
			
 
				+inline void ConvPerChannel(
			
 
				+    const ConvParams& params, const int32_t* output_multiplier,
			
 
				+    const int32_t* output_shift, const RuntimeShape& input_shape,
			
 
				+    const int8_t* input_data, const RuntimeShape& filter_shape,
			
 
				+    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				+    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				+    int8_t* output_data) {
			
 
				+  // Get parameters.
			
 
				+  const int32_t input_offset = params.input_offset;  // r = s(q - Z)
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int dilation_width_factor = params.dilation_width_factor;
			
 
				+  const int dilation_height_factor = params.dilation_height_factor;
			
 
				+  const int pad_width = params.padding_values.width;
			
 
				+  const int pad_height = params.padding_values.height;
			
 
				+  const int32_t output_offset = params.output_offset;
			
 
				+
			
 
				+  // Set min and max value of the output.
			
 
				+  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				+  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				+
			
 
				+  // Consistency check.
			
 
				+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
			
 
				+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
			
 
				+  if (bias_data) {
			
 
				+    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				+  }
			
 
				+
			
 
				+  // Check dimensions of the tensors.
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int filter_height = filter_shape.Dims(1);
			
 
				+  const int filter_width = filter_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
			
 
				+          const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				+          const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				+          int32_t acc = 0;
			
 
				+          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				+            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				+              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				+                const int in_y =
			
 
				+                    in_y_origin + dilation_height_factor * filter_y;
			
 
				+                // Zero padding by omitting the areas outside the image.
			
 
				+                const bool is_point_inside_image =
			
 
				+                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				+                    (in_y < input_height);
			
 
				+                if (is_point_inside_image) {
			
 
				+                  int32_t input_val = input_data[Offset(
			
 
				+                      input_shape, batch, in_y, in_x, in_channel)];
			
 
				+                  int32_t filter_val =
			
 
				+                      filter_data[Offset(filter_shape, out_channel, filter_y,
			
 
				+                                         filter_x, in_channel)];
			
 
				+                  // Accumulate with 32 bits accumulator.
			
 
				+                  // In the nudging process during model quantization, we force
			
 
				+                  // real value of 0.0 be represented by a quantized value. This
			
 
				+                  // guarantees that the input_offset is a int8_t, even though
			
 
				+                  // it is represented using int32_t. int32_t += int8_t *
			
 
				+                  // (int8_t - int8_t) so the highest value we can get from each
			
 
				+                  // accumulation is [-127, 127] * ([-128, 127] -
			
 
				+                  // [-128, 127]), which is [-32512, 32512]. log2(32512)
			
 
				+                  // = 14.98, which means we can accumulate at least 2^16
			
 
				+                  // multiplications without overflow. The accumulator is
			
 
				+                  // applied to a filter so the accumulation logic will hold as
			
 
				+                  // long as the filter size (filter_y * filter_x * in_channel)
			
 
				+                  // does not exceed 2^16, which is the case in all the models
			
 
				+                  // we have seen so far.
			
 
				+                  // TODO(jianlijianli): Add a check to make sure the
			
 
				+                  // accumulator depth is smaller than 2^16.
			
 
				+                  acc += filter_val * (input_val + input_offset);
			
 
				+                }
			
 
				+              }
			
 
				+            }
			
 
				+          }
			
 
				+
			
 
				+          if (bias_data) {
			
 
				+            acc += bias_data[out_channel];
			
 
				+          }
			
 
				+          acc = MultiplyByQuantizedMultiplier(
			
 
				+              acc, output_multiplier[out_channel], output_shift[out_channel]);
			
 
				+          acc += output_offset;
			
 
				+          acc = std::max(acc, output_activation_min);
			
 
				+          acc = std::min(acc, output_activation_max);
			
 
				+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
			
 
				+              static_cast<int8_t>(acc);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Fixed-point per-channel-quantization convolution reference kernel.
			
 
				+// 16-bit data and 8-bit filter
			
 
				+inline void ConvPerChannel(
			
 
				+    const ConvParams& params, const int32_t* output_multiplier,
			
 
				+    const int32_t* output_shift, const RuntimeShape& input_shape,
			
 
				+    const int16_t* input_data, const RuntimeShape& filter_shape,
			
 
				+    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				+    const std::int64_t* bias_data, const RuntimeShape& output_shape,
			
 
				+    int16_t* output_data) {
			
 
				+  // Get parameters.
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int dilation_width_factor = params.dilation_width_factor;
			
 
				+  const int dilation_height_factor = params.dilation_height_factor;
			
 
				+  const int pad_width = params.padding_values.width;
			
 
				+  const int pad_height = params.padding_values.height;
			
 
				+
			
 
				+  // Set min and max value of the output.
			
 
				+  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				+  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				+
			
 
				+  // Consistency check.
			
 
				+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
			
 
				+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
			
 
				+  if (bias_data) {
			
 
				+    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				+  }
			
 
				+
			
 
				+  // Check dimensions of the tensors.
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int filter_height = filter_shape.Dims(1);
			
 
				+  const int filter_width = filter_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
			
 
				+          const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				+          const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				+          std::int64_t acc = 0;
			
 
				+          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				+            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				+              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				+                const int in_y =
			
 
				+                    in_y_origin + dilation_height_factor * filter_y;
			
 
				+                // Zero padding by omitting the areas outside the image.
			
 
				+                const bool is_point_inside_image =
			
 
				+                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				+                    (in_y < input_height);
			
 
				+                if (is_point_inside_image) {
			
 
				+                  int32_t input_val = input_data[Offset(
			
 
				+                      input_shape, batch, in_y, in_x, in_channel)];
			
 
				+                  int32_t filter_val =
			
 
				+                      filter_data[Offset(filter_shape, out_channel, filter_y,
			
 
				+                                         filter_x, in_channel)];
			
 
				+                  // Accumulate with 64 bits accumulator.
			
 
				+                  // int64_t += int8_t * int16_t so the highest value we can
			
 
				+                  // get from each accumulation is [-127, 127] * ([-32768,
			
 
				+                  // 32767] -
			
 
				+                  // [-32768, 32767]), which is [-8322945, 8322945].
			
 
				+                  // log2(8322945) = 22.99.
			
 
				+                  acc += filter_val * input_val;
			
 
				+                }
			
 
				+              }
			
 
				+            }
			
 
				+          }
			
 
				+          if (bias_data) {
			
 
				+            acc += bias_data[out_channel];
			
 
				+          }
			
 
				+          int32_t scaled_acc = MultiplyByQuantizedMultiplier(
			
 
				+              acc, output_multiplier[out_channel], output_shift[out_channel]);
			
 
				+          scaled_acc = std::max(scaled_acc, output_activation_min);
			
 
				+          scaled_acc = std::min(scaled_acc, output_activation_max);
			
 
				+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
			
 
				+              static_cast<int16_t>(scaled_acc);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_integer_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
@@ -0,0 +1,289 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_integer_ops {
			
 
				+inline void DepthwiseConvPerChannel(
			
 
				+    const DepthwiseParams& params, const int32_t* output_multiplier,
			
 
				+    const int32_t* output_shift, const RuntimeShape& input_shape,
			
 
				+    const int8_t* input_data, const RuntimeShape& filter_shape,
			
 
				+    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				+    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				+    int8_t* output_data) {
			
 
				+  // Get parameters.
			
 
				+  // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int dilation_width_factor = params.dilation_width_factor;
			
 
				+  const int dilation_height_factor = params.dilation_height_factor;
			
 
				+  const int pad_width = params.padding_values.width;
			
 
				+  const int pad_height = params.padding_values.height;
			
 
				+  const int depth_multiplier = params.depth_multiplier;
			
 
				+  const int32_t input_offset = params.input_offset;
			
 
				+  const int32_t output_offset = params.output_offset;
			
 
				+  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				+  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				+
			
 
				+  // Check dimensions of the tensors.
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+
			
 
				+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int input_depth = input_shape.Dims(3);
			
 
				+  const int filter_height = filter_shape.Dims(1);
			
 
				+  const int filter_width = filter_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
			
 
				+  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				+
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				+          for (int m = 0; m < depth_multiplier; ++m) {
			
 
				+            const int output_channel = m + in_channel * depth_multiplier;
			
 
				+            const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				+            const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				+            int32_t acc = 0;
			
 
				+            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				+              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				+                const int in_y =
			
 
				+                    in_y_origin + dilation_height_factor * filter_y;
			
 
				+                // Zero padding by omitting the areas outside the image.
			
 
				+                const bool is_point_inside_image =
			
 
				+                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				+                    (in_y < input_height);
			
 
				+                if (is_point_inside_image) {
			
 
				+                  int32_t input_val = input_data[Offset(
			
 
				+                      input_shape, batch, in_y, in_x, in_channel)];
			
 
				+                  int32_t filter_val = filter_data[Offset(
			
 
				+                      filter_shape, 0, filter_y, filter_x, output_channel)];
			
 
				+                  // Accumulate with 32 bits accumulator.
			
 
				+                  // In the nudging process during model quantization, we force
			
 
				+                  // real value of 0.0 be represented by a quantized value. This
			
 
				+                  // guarantees that the input_offset is a int8_t, even though
			
 
				+                  // it is represented using int32_t. int32_t += int8_t *
			
 
				+                  // (int8_t - int8_t) so the highest value we can get from each
			
 
				+                  // accumulation is [-127, 127] * ([-128, 127] -
			
 
				+                  // [-128, 127]), which is [-32512, 32512]. log2(32512)
			
 
				+                  // = 14.98, which means we can accumulate at least 2^16
			
 
				+                  // multiplications without overflow. The accumulator is
			
 
				+                  // applied to a filter so the accumulation logic will hold as
			
 
				+                  // long as the filter size (filter_y * filter_x * in_channel)
			
 
				+                  // does not exceed 2^16, which is the case in all the models
			
 
				+                  // we have seen so far.
			
 
				+                  // TODO(jianlijianli): Add a check to make sure the
			
 
				+                  // accumulator depth is smaller than 2^16.
			
 
				+                  acc += filter_val * (input_val + input_offset);
			
 
				+                }
			
 
				+              }
			
 
				+            }
			
 
				+            if (bias_data) {
			
 
				+              acc += bias_data[output_channel];
			
 
				+            }
			
 
				+            acc = MultiplyByQuantizedMultiplier(
			
 
				+                acc, output_multiplier[output_channel],
			
 
				+                output_shift[output_channel]);
			
 
				+            acc += output_offset;
			
 
				+            acc = std::max(acc, output_activation_min);
			
 
				+            acc = std::min(acc, output_activation_max);
			
 
				+            output_data[Offset(output_shape, batch, out_y, out_x,
			
 
				+                               output_channel)] = static_cast<int8_t>(acc);
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void DepthwiseConvPerChannel(
			
 
				+    const DepthwiseParams& params, const int32_t* output_multiplier,
			
 
				+    const int32_t* output_shift, const RuntimeShape& input_shape,
			
 
				+    const int16_t* input_data, const RuntimeShape& filter_shape,
			
 
				+    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				+    const std::int64_t* bias_data, const RuntimeShape& output_shape,
			
 
				+    int16_t* output_data) {
			
 
				+  // Get parameters.
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int dilation_width_factor = params.dilation_width_factor;
			
 
				+  const int dilation_height_factor = params.dilation_height_factor;
			
 
				+  const int pad_width = params.padding_values.width;
			
 
				+  const int pad_height = params.padding_values.height;
			
 
				+  const int depth_multiplier = params.depth_multiplier;
			
 
				+  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				+  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				+
			
 
				+  // Check dimensions of the tensors.
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+
			
 
				+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int input_depth = input_shape.Dims(3);
			
 
				+  const int filter_height = filter_shape.Dims(1);
			
 
				+  const int filter_width = filter_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
			
 
				+  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				+
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				+          for (int m = 0; m < depth_multiplier; ++m) {
			
 
				+            const int output_channel = m + in_channel * depth_multiplier;
			
 
				+            const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				+            const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				+            std::int64_t acc = 0;
			
 
				+            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				+              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				+                const int in_y =
			
 
				+                    in_y_origin + dilation_height_factor * filter_y;
			
 
				+                // Zero padding by omitting the areas outside the image.
			
 
				+                const bool is_point_inside_image =
			
 
				+                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				+                    (in_y < input_height);
			
 
				+                if (is_point_inside_image) {
			
 
				+                  int32_t input_val = input_data[Offset(
			
 
				+                      input_shape, batch, in_y, in_x, in_channel)];
			
 
				+                  int32_t filter_val = filter_data[Offset(
			
 
				+                      filter_shape, 0, filter_y, filter_x, output_channel)];
			
 
				+                  // Accumulate with 64 bits accumulator.
			
 
				+                  // We assume maximum of 2^16 accumulations as with the 8-bit
			
 
				+                  // case so actually the value in the accumulator should not
			
 
				+                  // exceed 40 bits
			
 
				+                  acc += static_cast<int64_t>(filter_val) *
			
 
				+                         static_cast<int64_t>(input_val);
			
 
				+                }
			
 
				+              }
			
 
				+            }
			
 
				+            if (bias_data) {
			
 
				+              acc += bias_data[output_channel];
			
 
				+            }
			
 
				+            int32_t scaled_acc = MultiplyByQuantizedMultiplier(
			
 
				+                acc, output_multiplier[output_channel],
			
 
				+                output_shift[output_channel]);
			
 
				+            scaled_acc = std::max(scaled_acc, output_activation_min);
			
 
				+            scaled_acc = std::min(scaled_acc, output_activation_max);
			
 
				+            output_data[Offset(output_shape, batch, out_y, out_x,
			
 
				+                               output_channel)] =
			
 
				+                static_cast<int16_t>(scaled_acc);
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void DepthwiseConvHybridPerChannel(
			
 
				+    const DepthwiseParams& params, float* scaling_factors_ptr,
			
 
				+    const RuntimeShape& input_shape, const int8_t* input_data,
			
 
				+    const RuntimeShape& filter_shape, const int8_t* filter_data,
			
 
				+    const RuntimeShape& bias_shape, const float* bias_data,
			
 
				+    const RuntimeShape& output_shape, float* output_data,
			
 
				+    const float* per_channel_scale, int32_t* input_offset) {
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int dilation_width_factor = params.dilation_width_factor;
			
 
				+  const int dilation_height_factor = params.dilation_height_factor;
			
 
				+  const int pad_width = params.padding_values.width;
			
 
				+  const int pad_height = params.padding_values.height;
			
 
				+  const int depth_multiplier = params.depth_multiplier;
			
 
				+  const float output_activation_min = params.float_activation_min;
			
 
				+  const float output_activation_max = params.float_activation_max;
			
 
				+  // Check dimensions of the tensors.
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int input_depth = input_shape.Dims(3);
			
 
				+  const int filter_height = filter_shape.Dims(1);
			
 
				+  const int filter_width = filter_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  const int bias_depth = bias_shape.FlatSize();
			
 
				+  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
			
 
				+  TFLITE_DCHECK_EQ(bias_depth, output_depth);
			
 
				+
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				+          for (int m = 0; m < depth_multiplier; ++m) {
			
 
				+            const int output_channel = m + in_channel * depth_multiplier;
			
 
				+            const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				+            const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				+            int32_t acc = 0;
			
 
				+            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				+              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				+                const int in_y =
			
 
				+                    in_y_origin + dilation_height_factor * filter_y;
			
 
				+                // Zero padding by omitting the areas outside the image.
			
 
				+                const bool is_point_inside_image =
			
 
				+                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				+                    (in_y < input_height);
			
 
				+                if (is_point_inside_image) {
			
 
				+                  int32_t input_val = input_data[Offset(
			
 
				+                      input_shape, batch, in_y, in_x, in_channel)];
			
 
				+                  int32_t filter_val = filter_data[Offset(
			
 
				+                      filter_shape, 0, filter_y, filter_x, output_channel)];
			
 
				+                  acc += filter_val * (input_val - input_offset[batch]);
			
 
				+                }
			
 
				+              }
			
 
				+            }
			
 
				+            float acc_float = static_cast<float>(acc);
			
 
				+            acc_float *=
			
 
				+                per_channel_scale[output_channel] * scaling_factors_ptr[batch];
			
 
				+            if (bias_data && output_channel < bias_depth) {
			
 
				+              acc_float += bias_data[output_channel];
			
 
				+            }
			
 
				+            output_data[Offset(output_shape, batch, out_y, out_x,
			
 
				+                               output_channel)] =
			
 
				+                ActivationFunctionWithMinMax(acc_float, output_activation_min,
			
 
				+                                             output_activation_max);
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_integer_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
@@ -0,0 +1,108 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_integer_ops {
			
 
				+
			
 
				+inline void FullyConnected(
			
 
				+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
			
 
				+    const int8_t* input_data, const RuntimeShape& filter_shape,
			
 
				+    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				+    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				+    int8_t* output_data) {
			
 
				+  const int32_t input_offset = params.input_offset;
			
 
				+  const int32_t filter_offset = params.weights_offset;
			
 
				+  const int32_t output_offset = params.output_offset;
			
 
				+  const int32_t output_multiplier = params.output_multiplier;
			
 
				+  const int output_shift = params.output_shift;
			
 
				+  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				+  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				+  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
			
 
				+
			
 
				+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				+  const int filter_dim_count = filter_shape.DimensionsCount();
			
 
				+  const int batches = output_shape.Dims(0);
			
 
				+  const int output_depth = output_shape.Dims(1);
			
 
				+  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
			
 
				+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
			
 
				+  for (int b = 0; b < batches; ++b) {
			
 
				+    for (int out_c = 0; out_c < output_depth; ++out_c) {
			
 
				+      int32_t acc = 0;
			
 
				+      for (int d = 0; d < accum_depth; ++d) {
			
 
				+        int32_t input_val = input_data[b * accum_depth + d];
			
 
				+        int32_t filter_val = filter_data[out_c * accum_depth + d];
			
 
				+        acc += (filter_val + filter_offset) * (input_val + input_offset);
			
 
				+      }
			
 
				+      if (bias_data) {
			
 
				+        acc += bias_data[out_c];
			
 
				+      }
			
 
				+      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
			
 
				+      acc += output_offset;
			
 
				+      acc = std::max(acc, output_activation_min);
			
 
				+      acc = std::min(acc, output_activation_max);
			
 
				+      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void FullyConnected(
			
 
				+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
			
 
				+    const int16_t* input_data, const RuntimeShape& filter_shape,
			
 
				+    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				+    const int64_t* bias_data, const RuntimeShape& output_shape,
			
 
				+    int16_t* output_data) {
			
 
				+  const int32_t filter_offset = params.weights_offset;
			
 
				+  const int32_t output_multiplier = params.output_multiplier;
			
 
				+  const int output_shift = params.output_shift;
			
 
				+  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				+  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				+  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
			
 
				+
			
 
				+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				+  const int filter_dim_count = filter_shape.DimensionsCount();
			
 
				+  const int batches = output_shape.Dims(0);
			
 
				+  const int output_depth = output_shape.Dims(1);
			
 
				+  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
			
 
				+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
			
 
				+  for (int b = 0; b < batches; ++b) {
			
 
				+    for (int out_c = 0; out_c < output_depth; ++out_c) {
			
 
				+      int64_t acc = 0;
			
 
				+      for (int d = 0; d < accum_depth; ++d) {
			
 
				+        int32_t input_val = input_data[b * accum_depth + d];
			
 
				+        int32_t filter_val = filter_data[out_c * accum_depth + d];
			
 
				+        acc += (filter_val + filter_offset) * input_val;
			
 
				+      }
			
 
				+      if (bias_data) {
			
 
				+        acc += bias_data[out_c];
			
 
				+      }
			
 
				+      int32_t acc_scaled =
			
 
				+          MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
			
 
				+      acc_scaled = std::max(acc_scaled, output_activation_min);
			
 
				+      acc_scaled = std::min(acc_scaled, output_activation_max);
			
 
				+      output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_integer_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h
@@ -0,0 +1,65 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_integer_ops {
			
 
				+
			
 
				+inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
			
 
				+                            int32_t depth, const int8_t* input_data,
			
 
				+                            int8_t* output_data) {
			
 
				+  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
			
 
				+  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
			
 
				+  // The output scale must be in sync with Prepare().
			
 
				+  // Output is in 1/128 scale so the actual output range is nudged from [-1, 1]
			
 
				+  // to [-1, 127/128].
			
 
				+  static constexpr int32_t kOutputScale = 7;
			
 
				+  for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
			
 
				+    // int32_t = (int8_t - int8_t) ^ 2.
			
 
				+    // ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is
			
 
				+    // safe from overflowing in at least 2^16 steps.
			
 
				+    int32_t acc = 0;
			
 
				+    for (int inner_index = 0; inner_index < depth; ++inner_index) {
			
 
				+      int32_t input =
			
 
				+          input_data[depth * outer_index + inner_index] - input_zero_point;
			
 
				+      acc += input * input;
			
 
				+    }
			
 
				+    int32_t inv_l2norm_multiplier;
			
 
				+    int inv_l2norm_shift;
			
 
				+    GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier,
			
 
				+                                     &inv_l2norm_shift);
			
 
				+
			
 
				+    for (int inner_index = 0; inner_index < depth; ++inner_index) {
			
 
				+      int32_t input =
			
 
				+          input_data[depth * outer_index + inner_index] - input_zero_point;
			
 
				+
			
 
				+      // Rescale and downcast. Rescale is folded into the division.
			
 
				+      int32_t output_in_q24 = MultiplyByQuantizedMultiplier(
			
 
				+          input, inv_l2norm_multiplier, inv_l2norm_shift + kOutputScale);
			
 
				+      output_in_q24 =
			
 
				+          std::min(static_cast<int32_t>(kMaxInt8),
			
 
				+                   std::max(static_cast<int32_t>(kMinInt8), output_in_q24));
			
 
				+      output_data[depth * outer_index + inner_index] =
			
 
				+          static_cast<int8_t>(output_in_q24);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+}  // namespace reference_integer_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
@@ -0,0 +1,99 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
			
 
				+
			
 
				+#include <limits>
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_integer_ops {
			
 
				+
			
 
				+inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
			
 
				+                     int32_t input_multiplier, int32_t input_left_shift,
			
 
				+                     int32_t input_size, const int8_t* input_data,
			
 
				+                     int8_t* output_data) {
			
 
				+  // Integer bits must be in sync with Prepare() function.
			
 
				+  static constexpr int32_t kInputIntegerBits = 4;
			
 
				+  static constexpr int32_t kOutputIntegerBits = 8;
			
 
				+  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
			
 
				+  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
			
 
				+  static constexpr int32_t kOutputZeroPoint = -128;
			
 
				+
			
 
				+  for (int i = 0; i < input_size; ++i) {
			
 
				+    const int32_t input =
			
 
				+        static_cast<int32_t>(input_data[i]) - input_zero_point;
			
 
				+    if (input <= -input_range_radius) {
			
 
				+      output_data[i] = kMinInt8;
			
 
				+    } else if (input >= input_range_radius) {
			
 
				+      output_data[i] = kMaxInt8;
			
 
				+    } else {
			
 
				+      const int32_t input_in_q4 = MultiplyByQuantizedMultiplier(
			
 
				+          input, input_multiplier, input_left_shift);
			
 
				+      using FixedPoint4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
			
 
				+      const int32_t output_in_q0 =
			
 
				+          gemmlowp::logistic(FixedPoint4::FromRaw(input_in_q4)).raw();
			
 
				+
			
 
				+      // Rescale and downcast.
			
 
				+      using gemmlowp::RoundingDivideByPOT;
			
 
				+      int32_t output_in_q23 =
			
 
				+          RoundingDivideByPOT(output_in_q0, 31 - kOutputIntegerBits);
			
 
				+      output_in_q23 = std::min(std::max(output_in_q23 + kOutputZeroPoint,
			
 
				+                                        static_cast<int32_t>(kMinInt8)),
			
 
				+                               static_cast<int32_t>(kMaxInt8));
			
 
				+      output_data[i] = static_cast<int8_t>(output_in_q23);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void Logistic(int32_t input_multiplier, int32_t input_size,
			
 
				+                     const int16_t* ptr_input_data, int16_t* ptr_output_data) {
			
 
				+  // We use the LUT for sigmoid and take into account, that
			
 
				+  // tanh(x) = 2*sigmoid(2*x) - 1
			
 
				+
			
 
				+  int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1;
			
 
				+
			
 
				+  for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
			
 
				+    int32_t input_data = (*ptr_input_data) * input_data_mul;
			
 
				+
			
 
				+    // Scale by 3/4 to expand range [-8,8]->[-10.7,10.7] and
			
 
				+    // we do interpolation on unsigned values.
			
 
				+    uint32_t abs_input_data = 3 * abs(input_data);
			
 
				+
			
 
				+    // We divide by 2 power of 9, because
			
 
				+    // we need to divide by 2 in power of 7 for
			
 
				+    // the input conversion + 1/4 from the scale above.
			
 
				+    uint8_t uh = abs_input_data >> 9;
			
 
				+    uint32_t ua = sigmoid_table_uint16[uh];
			
 
				+    uint32_t ub = sigmoid_table_uint16[uh + 1];
			
 
				+    uint32_t ut = abs_input_data & 0x1ff;
			
 
				+
			
 
				+    // Interpolation is done using the fractional bit.
			
 
				+    uint32_t result = (ua << 9) + ut * (ub - ua);
			
 
				+
			
 
				+    result = (input_data >= 0) ? (result + (1 << 9))
			
 
				+                               : ((1 << (16 + 9)) - result + (1 << 9) - 1);
			
 
				+
			
 
				+    // Back to 16-bit.
			
 
				+    result >>= 10;
			
 
				+
			
 
				+    *ptr_output_data = result;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_integer_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
@@ -0,0 +1,131 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
			
 
				+
			
 
				+#include "fixedpoint/fixedpoint.h"
			
 
				+#include "ruy/profiler/instrumentation.h"  // from @ruy
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_integer_ops {
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void MulElementwise(int size, const ArithmeticParams& params,
			
 
				+                           const T* input1_data, const T* input2_data,
			
 
				+                           T* output_data) {
			
 
				+  for (int i = 0; i < size; ++i) {
			
 
				+    const int32_t input1_val = params.input1_offset + input1_data[i];
			
 
				+    const int32_t input2_val = params.input2_offset + input2_data[i];
			
 
				+    const int32_t unclamped_result =
			
 
				+        params.output_offset +
			
 
				+        MultiplyByQuantizedMultiplier(input1_val * input2_val,
			
 
				+                                      params.output_multiplier,
			
 
				+                                      params.output_shift);
			
 
				+    const int32_t clamped_output =
			
 
				+        std::min(params.quantized_activation_max,
			
 
				+                 std::max(params.quantized_activation_min, unclamped_result));
			
 
				+    output_data[i] = static_cast<T>(clamped_output);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void Mul(const ArithmeticParams& params,
			
 
				+                const RuntimeShape& input1_shape, const T* input1_data,
			
 
				+                const RuntimeShape& input2_shape, const T* input2_data,
			
 
				+                const RuntimeShape& output_shape, T* output_data) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+  ruy::profiler::ScopeLabel label("Mul/8bit");
			
 
				+  const int flat_size =
			
 
				+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				+
			
 
				+  MulElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				+}
			
 
				+
			
 
				+// Mul with 16 bit inputs and int8_t outputs.
			
 
				+inline void Mul(const ArithmeticParams& params,
			
 
				+                const RuntimeShape& input1_shape, const int16_t* input1_data,
			
 
				+                const RuntimeShape& input2_shape, const int16_t* input2_data,
			
 
				+                const RuntimeShape& output_shape, int8_t* output_data) {
			
 
				+  ruy::profiler::ScopeLabel label("Mul/Int16Int8");
			
 
				+  int32_t output_offset = params.output_offset;
			
 
				+  int32_t output_activation_min = params.quantized_activation_min;
			
 
				+  int32_t output_activation_max = params.quantized_activation_max;
			
 
				+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				+
			
 
				+  const int flat_size =
			
 
				+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				+
			
 
				+  for (int i = 0; i < flat_size; i++) {
			
 
				+    // F0 uses 0 integer bits, range [-1, 1].
			
 
				+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
			
 
				+
			
 
				+    F0 unclamped_result =
			
 
				+        F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
			
 
				+    int16_t rescaled_result =
			
 
				+        gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
			
 
				+    int16_t clamped_result = std::min<int16_t>(
			
 
				+        output_activation_max - output_offset, rescaled_result);
			
 
				+    clamped_result = std::max<int16_t>(output_activation_min - output_offset,
			
 
				+                                       clamped_result);
			
 
				+    output_data[i] = output_offset + clamped_result;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void BroadcastMul4DSlow(
			
 
				+    const ArithmeticParams& params, const RuntimeShape& input1_shape,
			
 
				+    const T* input1_data, const RuntimeShape& input2_shape,
			
 
				+    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
			
 
				+  ruy::profiler::ScopeLabel label("BroadcastMul4DSlow");
			
 
				+
			
 
				+  NdArrayDesc<4> desc1;
			
 
				+  NdArrayDesc<4> desc2;
			
 
				+  // The input shapes are extended as part of NdArrayDesc initialization.
			
 
				+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				+                                      &desc2);
			
 
				+  const RuntimeShape extended_output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, output_shape);
			
 
				+
			
 
				+  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
			
 
				+    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
			
 
				+      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
			
 
				+        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
			
 
				+          const int32_t input1_val =
			
 
				+              params.input1_offset +
			
 
				+              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
			
 
				+          const int32_t input2_val =
			
 
				+              params.input2_offset +
			
 
				+              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
			
 
				+          const int32_t unclamped_result =
			
 
				+              params.output_offset +
			
 
				+              MultiplyByQuantizedMultiplier(input1_val * input2_val,
			
 
				+                                            params.output_multiplier,
			
 
				+                                            params.output_shift);
			
 
				+          const int32_t clamped_output = std::min(
			
 
				+              params.quantized_activation_max,
			
 
				+              std::max(params.quantized_activation_min, unclamped_result));
			
 
				+          output_data[Offset(extended_output_shape, b, y, x, c)] =
			
 
				+              static_cast<T>(clamped_output);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_integer_ops
			
 
				+}  // namespace tflite
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
@@ -0,0 +1,258 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
			
 
				+
			
 
				+#include <limits>
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_integer_ops {
			
 
				+
			
 
				+inline void AveragePool(const PoolParams& params,
			
 
				+                        const RuntimeShape& input_shape,
			
 
				+                        const int8_t* input_data,
			
 
				+                        const RuntimeShape& output_shape, int8_t* output_data) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int channel = 0; channel < depth; ++channel) {
			
 
				+          const int in_x_origin =
			
 
				+              (out_x * stride_width) - params.padding_values.width;
			
 
				+          const int in_y_origin =
			
 
				+              (out_y * stride_height) - params.padding_values.height;
			
 
				+          // Compute the boundaries of the filter region clamped so as to
			
 
				+          // ensure that the filter window fits in the input array.
			
 
				+          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				+          const int filter_x_end =
			
 
				+              std::min(params.filter_width, input_width - in_x_origin);
			
 
				+          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				+          const int filter_y_end =
			
 
				+              std::min(params.filter_height, input_height - in_y_origin);
			
 
				+          int32_t acc = 0;
			
 
				+          int filter_count = 0;
			
 
				+          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				+               ++filter_y) {
			
 
				+            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				+                 ++filter_x) {
			
 
				+              const int in_x = in_x_origin + filter_x;
			
 
				+              const int in_y = in_y_origin + filter_y;
			
 
				+              acc +=
			
 
				+                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
			
 
				+              filter_count++;
			
 
				+            }
			
 
				+          }
			
 
				+          // Round to the closest integer value.
			
 
				+          acc = acc > 0 ? (acc + filter_count / 2) / filter_count
			
 
				+                        : (acc - filter_count / 2) / filter_count;
			
 
				+          acc = std::max(acc, params.quantized_activation_min);
			
 
				+          acc = std::min(acc, params.quantized_activation_max);
			
 
				+          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				+              static_cast<int8_t>(acc);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
			
 
				+                    const int8_t* input_data, const RuntimeShape& output_shape,
			
 
				+                    int8_t* output_data) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+  TFLITE_DCHECK_GE(params.quantized_activation_min,
			
 
				+                   std::numeric_limits<int8_t>::min());
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_max,
			
 
				+                   std::numeric_limits<int8_t>::max());
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int channel = 0; channel < depth; ++channel) {
			
 
				+          const int in_x_origin =
			
 
				+              (out_x * stride_width) - params.padding_values.width;
			
 
				+          const int in_y_origin =
			
 
				+              (out_y * stride_height) - params.padding_values.height;
			
 
				+          // Compute the boundaries of the filter region clamped so as to
			
 
				+          // ensure that the filter window fits in the input array.
			
 
				+          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				+          const int filter_x_end =
			
 
				+              std::min(params.filter_width, input_width - in_x_origin);
			
 
				+          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				+          const int filter_y_end =
			
 
				+              std::min(params.filter_height, input_height - in_y_origin);
			
 
				+          int8_t max = std::numeric_limits<int8_t>::lowest();
			
 
				+          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				+               ++filter_y) {
			
 
				+            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				+                 ++filter_x) {
			
 
				+              const int in_x = in_x_origin + filter_x;
			
 
				+              const int in_y = in_y_origin + filter_y;
			
 
				+              max = std::max(
			
 
				+                  max,
			
 
				+                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
			
 
				+            }
			
 
				+          }
			
 
				+          max = std::max<int8_t>(max, params.quantized_activation_min);
			
 
				+          max = std::min<int8_t>(max, params.quantized_activation_max);
			
 
				+          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				+              static_cast<int8_t>(max);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void AveragePool(const PoolParams& params,
			
 
				+                        const RuntimeShape& input_shape,
			
 
				+                        const int16_t* input_data,
			
 
				+                        const RuntimeShape& output_shape,
			
 
				+                        int16_t* output_data) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int channel = 0; channel < depth; ++channel) {
			
 
				+          const int in_x_origin =
			
 
				+              (out_x * stride_width) - params.padding_values.width;
			
 
				+          const int in_y_origin =
			
 
				+              (out_y * stride_height) - params.padding_values.height;
			
 
				+          // Compute the boundaries of the filter region clamped so as to
			
 
				+          // ensure that the filter window fits in the input array.
			
 
				+          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				+          const int filter_x_end =
			
 
				+              std::min(params.filter_width, input_width - in_x_origin);
			
 
				+          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				+          const int filter_y_end =
			
 
				+              std::min(params.filter_height, input_height - in_y_origin);
			
 
				+          int32_t acc = 0;
			
 
				+          int filter_count = 0;
			
 
				+          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				+               ++filter_y) {
			
 
				+            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				+                 ++filter_x) {
			
 
				+              const int in_x = in_x_origin + filter_x;
			
 
				+              const int in_y = in_y_origin + filter_y;
			
 
				+              acc +=
			
 
				+                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
			
 
				+              filter_count++;
			
 
				+            }
			
 
				+          }
			
 
				+          // Round to the closest integer value.
			
 
				+          acc = acc > 0 ? (acc + filter_count / 2) / filter_count
			
 
				+                        : (acc - filter_count / 2) / filter_count;
			
 
				+          acc = std::max(acc, params.quantized_activation_min);
			
 
				+          acc = std::min(acc, params.quantized_activation_max);
			
 
				+          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				+              static_cast<int16_t>(acc);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
			
 
				+                    const int16_t* input_data, const RuntimeShape& output_shape,
			
 
				+                    int16_t* output_data) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+  TFLITE_DCHECK_GE(params.quantized_activation_min,
			
 
				+                   std::numeric_limits<int16_t>::min());
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_max,
			
 
				+                   std::numeric_limits<int16_t>::max());
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int channel = 0; channel < depth; ++channel) {
			
 
				+          const int in_x_origin =
			
 
				+              (out_x * stride_width) - params.padding_values.width;
			
 
				+          const int in_y_origin =
			
 
				+              (out_y * stride_height) - params.padding_values.height;
			
 
				+          // Compute the boundaries of the filter region clamped so as to
			
 
				+          // ensure that the filter window fits in the input array.
			
 
				+          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				+          const int filter_x_end =
			
 
				+              std::min(params.filter_width, input_width - in_x_origin);
			
 
				+          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				+          const int filter_y_end =
			
 
				+              std::min(params.filter_height, input_height - in_y_origin);
			
 
				+          int16_t max = std::numeric_limits<int16_t>::lowest();
			
 
				+          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				+               ++filter_y) {
			
 
				+            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				+                 ++filter_x) {
			
 
				+              const int in_x = in_x_origin + filter_x;
			
 
				+              const int in_y = in_y_origin + filter_y;
			
 
				+              max = std::max(
			
 
				+                  max,
			
 
				+                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
			
 
				+            }
			
 
				+          }
			
 
				+          max = std::max<int16_t>(max, params.quantized_activation_min);
			
 
				+          max = std::min<int16_t>(max, params.quantized_activation_max);
			
 
				+          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				+              static_cast<int16_t>(max);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_integer_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
@@ -0,0 +1,106 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
			
 
				+
			
 
				+#include <limits>
			
 
				+
			
 
				+#include "fixedpoint/fixedpoint.h"
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_integer_ops {
			
 
				+
			
 
				+inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
			
 
				+                 int32_t input_multiplier, int32_t input_shift,
			
 
				+                 int32_t input_size, const int8_t* input_data,
			
 
				+                 int8_t* output_data) {
			
 
				+  // Integer bits must be in sync with Prepare() function.
			
 
				+  static constexpr int32_t kInputIntegerBits = 4;
			
 
				+  static constexpr int32_t kOutputScale = 7;
			
 
				+  static constexpr int32_t kMinInt8 = std::numeric_limits<int8_t>::min();
			
 
				+  static constexpr int32_t kMaxInt8 = std::numeric_limits<int8_t>::max();
			
 
				+  using F4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
			
 
				+
			
 
				+  for (int i = 0; i < input_size; ++i) {
			
 
				+    const int32_t input =
			
 
				+        static_cast<int32_t>(input_data[i]) - input_zero_point;
			
 
				+    if (input <= -input_range_radius) {
			
 
				+      output_data[i] = kMinInt8;
			
 
				+    } else if (input >= input_range_radius) {
			
 
				+      output_data[i] = kMaxInt8;
			
 
				+    } else {
			
 
				+      const int32_t input_in_q4 =
			
 
				+          MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift);
			
 
				+      const int32_t output_in_q0 =
			
 
				+          gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw();
			
 
				+
			
 
				+      // Rescale and downcast.
			
 
				+      using gemmlowp::RoundingDivideByPOT;
			
 
				+      int32_t output_in_q24 =
			
 
				+          RoundingDivideByPOT(output_in_q0, 31 - kOutputScale);
			
 
				+      output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8);
			
 
				+      output_data[i] = static_cast<int8_t>(output_in_q24);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void Tanh(int32_t input_multiplier, int32_t input_left_shift,
			
 
				+                 int32_t input_size, const int16_t* ptr_input_data,
			
 
				+                 int16_t* ptr_output_data) {
			
 
				+  // We use the LUT for sigmoid and take into account, that
			
 
				+  // tanh(x) = 2*sigmoid(2*x) - 1
			
 
				+
			
 
				+  int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1;
			
 
				+
			
 
				+  for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
			
 
				+    int32_t input_data = (*ptr_input_data) * input_data_mul;
			
 
				+
			
 
				+    if (input_left_shift == 1) {
			
 
				+      input_data <<= 1;
			
 
				+    }
			
 
				+
			
 
				+    // Scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
			
 
				+    uint32_t abs_input_data = 3 * abs(input_data);
			
 
				+    uint32_t uh = abs_input_data >> 8;
			
 
				+    int32_t result;
			
 
				+
			
 
				+    if (uh >= 255) {
			
 
				+      // Saturate to maximum.
			
 
				+      result = 0xFFFF << 8;
			
 
				+    } else {
			
 
				+      uint32_t ua = sigmoid_table_uint16[uh];
			
 
				+      uint32_t ub = sigmoid_table_uint16[uh + 1];
			
 
				+
			
 
				+      uint8_t ut = abs_input_data & 0xFF;
			
 
				+
			
 
				+      result = (ua << 8) + ut * (ub - ua);
			
 
				+    }
			
 
				+
			
 
				+    result = (input_data >= 0)
			
 
				+                 ? (result - (1 << (14 + 9)) + (1 << (9 - 2)))
			
 
				+                 : (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1);
			
 
				+
			
 
				+    // Convert back to 16-bit.
			
 
				+    result >>= (9 - 1);
			
 
				+
			
 
				+    *ptr_output_data = result;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_integer_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/l2normalization.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/l2normalization.h
@@ -0,0 +1,90 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
			
 
				+
			
 
				+#include <algorithm>
			
 
				+#include <cmath>
			
 
				+
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
			
 
				+                            const RuntimeShape& input_shape,
			
 
				+                            const float* input_data,
			
 
				+                            const RuntimeShape& output_shape,
			
 
				+                            float* output_data, float epsilon = 1e-6) {
			
 
				+  const int trailing_dim = input_shape.DimensionsCount() - 1;
			
 
				+  const int outer_size =
			
 
				+      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
			
 
				+  const int depth =
			
 
				+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
			
 
				+  for (int i = 0; i < outer_size; ++i) {
			
 
				+    float squared_l2_norm = 0;
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      const float val = input_data[depth * i + c];
			
 
				+      squared_l2_norm += val * val;
			
 
				+    }
			
 
				+    float l2_norm = std::sqrt(squared_l2_norm);
			
 
				+    l2_norm = std::max(l2_norm, epsilon);
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
			
 
				+                            const RuntimeShape& input_shape,
			
 
				+                            const uint8_t* input_data,
			
 
				+                            const RuntimeShape& output_shape,
			
 
				+                            uint8_t* output_data) {
			
 
				+  const int trailing_dim = input_shape.DimensionsCount() - 1;
			
 
				+  const int depth =
			
 
				+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
			
 
				+  const int outer_size =
			
 
				+      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
			
 
				+  const int32_t input_zero_point = op_params.input_zero_point;
			
 
				+
			
 
				+  for (int i = 0; i < outer_size; ++i) {
			
 
				+    int32_t square_l2_norm = 0;
			
 
				+    for (int c = 0; c < depth; c++) {
			
 
				+      int32_t diff = input_data[depth * i + c] - input_zero_point;
			
 
				+      square_l2_norm += diff * diff;
			
 
				+    }
			
 
				+    int32_t inv_l2norm_multiplier;
			
 
				+    int inv_l2norm_shift;
			
 
				+    GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
			
 
				+                                     &inv_l2norm_multiplier, &inv_l2norm_shift);
			
 
				+    for (int c = 0; c < depth; c++) {
			
 
				+      int32_t diff = input_data[depth * i + c] - input_zero_point;
			
 
				+      int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+          128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
			
 
				+      int32_t unclamped_output_val = 128 + rescaled_diff;
			
 
				+      int32_t output_val =
			
 
				+          std::min(static_cast<int32_t>(255),
			
 
				+                   std::max(static_cast<int32_t>(0), unclamped_output_val));
			
 
				+      output_data[depth * i + c] = static_cast<uint8_t>(output_val);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/logistic.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/logistic.h
@@ -0,0 +1,132 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
			
 
				+
			
 
				+#include <cmath>
			
 
				+
			
 
				+#include "fixedpoint/fixedpoint.h"
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				+#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+#include "tensorflow/lite/kernels/op_macros.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
			
 
				+                     const RuntimeShape& output_shape, float* output_data) {
			
 
				+  const float cutoff_upper = 16.619047164916992188f;
			
 
				+  const float cutoff_lower = -9.f;
			
 
				+
			
 
				+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+
			
 
				+  // Rational for using approximation in reference kernel.
			
 
				+  // 0. This approximation gives enough precision for float.
			
 
				+  // 1. This works around an issue on an embedded chipset where exp() does not
			
 
				+  // return correctly as expected - exp(x) should return inf when overflown
			
 
				+  // not 1.701417   IEEE 754 defines representation for inf.
			
 
				+  // 2. This will speed up calculation and is matching the behavior in the
			
 
				+  // optimized kernels. (check the definition of scalar_logistic_op<float>)
			
 
				+
			
 
				+  for (int i = 0; i < flat_size; i++) {
			
 
				+    float val = input_data[i];
			
 
				+    float result;
			
 
				+    if (val > cutoff_upper) {
			
 
				+      result = 1.0f;
			
 
				+    } else if (val < cutoff_lower) {
			
 
				+      result = std::exp(val);
			
 
				+    } else {
			
 
				+      result = 1.f / (1.f + std::exp(-val));
			
 
				+    }
			
 
				+    output_data[i] = result;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Convenience version that allows, for example, generated-code calls to be
			
 
				+// uniform between data types.
			
 
				+inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
			
 
				+                     const float* input_data, const RuntimeShape& output_shape,
			
 
				+                     float* output_data) {
			
 
				+  // Drop params: not needed.
			
 
				+  Logistic(input_shape, input_data, output_shape, output_data);
			
 
				+}
			
 
				+
			
 
				+inline void Logistic(const LogisticParams& params,
			
 
				+                     const RuntimeShape& input_shape, const int16_t* input_data,
			
 
				+                     const RuntimeShape& output_shape, int16_t* output_data) {
			
 
				+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+
			
 
				+  for (int i = 0; i < flat_size; i++) {
			
 
				+    // F0 uses 0 integer bits, range [-1, 1].
			
 
				+    // This is the return type of math functions such as tanh, logistic,
			
 
				+    // whose range is in [-1, 1].
			
 
				+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
			
 
				+    // F3 uses 3 integer bits, range [-8, 8], the input range expected here.
			
 
				+    using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
			
 
				+
			
 
				+    const F3 input = F3::FromRaw(input_data[i]);
			
 
				+    F0 output = gemmlowp::logistic(input);
			
 
				+    output_data[i] = output.raw();
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Quantized int8_t logistic activation.  Cheats by dequantizing and
			
 
				+// requantizing around the floating point logistic method.  This implementation
			
 
				+// is slow on platforms without a floating point unit.
			
 
				+
			
 
				+// TODO(b/141211002): Delete this int8_t implementation once we can reuse the
			
 
				+// approach used in TFLite for int8_t Logistic.
			
 
				+inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data,
			
 
				+                     float input_scale, int input_zero_point,
			
 
				+                     const RuntimeShape& output_shape, int8_t* output_data,
			
 
				+                     float output_scale, int output_zero_point) {
			
 
				+  const float cutoff_upper = 16.619047164916992188f;
			
 
				+  const float cutoff_lower = -9.f;
			
 
				+
			
 
				+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+
			
 
				+  // Rational for using approximation in reference kernel.
			
 
				+  // 0. This approximation gives enough precision for float.
			
 
				+  // 1. This works around an issue on an embedded chipset where exp() does not
			
 
				+  // return correctly as expected - exp(x) should return inf when overflown
			
 
				+  // not 1.701417   IEEE 754 defines representation for inf.
			
 
				+  // 2. This will speed up calculation and is matching the behavior in the
			
 
				+  // optimized kernels. (check the definition of scalar_logistic_op<float>)
			
 
				+
			
 
				+  for (int i = 0; i < flat_size; i++) {
			
 
				+    // Dequantize.
			
 
				+    float val =
			
 
				+        static_cast<float>((input_data[i] - input_zero_point) * input_scale);
			
 
				+    float result;
			
 
				+    if (val > cutoff_upper) {
			
 
				+      result = 1.0f;
			
 
				+    } else if (val < cutoff_lower) {
			
 
				+      result = std::exp(val);
			
 
				+    } else {
			
 
				+      result = 1.f / (1.f + std::exp(-val));
			
 
				+    }
			
 
				+    // Requantize
			
 
				+    int8_t output =
			
 
				+        static_cast<int8_t>(result / output_scale + output_zero_point);
			
 
				+    output_data[i] = output;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/maximum_minimum.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/maximum_minimum.h
@@ -0,0 +1,64 @@
 
				+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+template <typename T, typename Op, int N = 5>
			
 
				+void MaximumMinimumBroadcastSlow(const RuntimeShape& unextended_input1_shape,
			
 
				+                                 const T* input1_data,
			
 
				+                                 const RuntimeShape& unextended_input2_shape,
			
 
				+                                 const T* input2_data,
			
 
				+                                 const RuntimeShape& unextended_output_shape,
			
 
				+                                 T* output_data, Op op) {
			
 
				+  // Uses element-wise calculation if broadcast is not required.
			
 
				+  if (unextended_input1_shape == unextended_input2_shape) {
			
 
				+    const int flat_size =
			
 
				+        MatchingElementsSize(unextended_input1_shape, unextended_input2_shape,
			
 
				+                             unextended_output_shape);
			
 
				+    for (int i = 0; i < flat_size; ++i) {
			
 
				+      output_data[i] = op(input1_data[i], input2_data[i]);
			
 
				+    }
			
 
				+  } else {
			
 
				+    TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
			
 
				+    TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
			
 
				+    TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
			
 
				+
			
 
				+    NdArrayDesc<N> desc1;
			
 
				+    NdArrayDesc<N> desc2;
			
 
				+    NdArrayDesc<N> output_desc;
			
 
				+    NdArrayDescsForElementwiseBroadcast(
			
 
				+        unextended_input1_shape, unextended_input2_shape, &desc1, &desc2);
			
 
				+    CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
			
 
				+                   &output_desc);
			
 
				+
			
 
				+    auto maxmin_func = [&](int indexes[N]) {
			
 
				+      output_data[SubscriptToIndex(output_desc, indexes)] =
			
 
				+          op(input1_data[SubscriptToIndex(desc1, indexes)],
			
 
				+             input2_data[SubscriptToIndex(desc2, indexes)]);
			
 
				+    };
			
 
				+    NDOpsHelper<N>(output_desc, maxmin_func);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/mul.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/mul.h
@@ -0,0 +1,166 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+// Element-wise mul that can often be used for inner loop of broadcast Mul as
			
 
				+// well as the non-broadcast Mul.
			
 
				+inline void MulElementwise(int size, const ArithmeticParams& params,
			
 
				+                           const uint8_t* input1_data,
			
 
				+                           const uint8_t* input2_data, uint8_t* output_data) {
			
 
				+  for (int i = 0; i < size; ++i) {
			
 
				+    const int32_t input1_val = params.input1_offset + input1_data[i];
			
 
				+    const int32_t input2_val = params.input2_offset + input2_data[i];
			
 
				+    const int32_t unclamped_result =
			
 
				+        params.output_offset +
			
 
				+        MultiplyByQuantizedMultiplier(input1_val * input2_val,
			
 
				+                                      params.output_multiplier,
			
 
				+                                      params.output_shift);
			
 
				+    const int32_t clamped_output =
			
 
				+        std::min(params.quantized_activation_max,
			
 
				+                 std::max(params.quantized_activation_min, unclamped_result));
			
 
				+    output_data[i] = static_cast<uint8_t>(clamped_output);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void Mul(const ArithmeticParams& params,
			
 
				+                const RuntimeShape& input1_shape, const T* input1_data,
			
 
				+                const RuntimeShape& input2_shape, const T* input2_data,
			
 
				+                const RuntimeShape& output_shape, T* output_data) {
			
 
				+  T output_activation_min;
			
 
				+  T output_activation_max;
			
 
				+  GetActivationParams(params, &output_activation_min, &output_activation_max);
			
 
				+
			
 
				+  const int flat_size =
			
 
				+      MatchingFlatSize(input1_shape, input2_shape, output_shape);
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				+    output_data[i] = ActivationFunctionWithMinMax(
			
 
				+        input1_data[i] * input2_data[i], output_activation_min,
			
 
				+        output_activation_max);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void Mul(const ArithmeticParams& params,
			
 
				+                const RuntimeShape& input1_shape, const uint8_t* input1_data,
			
 
				+                const RuntimeShape& input2_shape, const uint8_t* input2_data,
			
 
				+                const RuntimeShape& output_shape, uint8_t* output_data) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+  const int flat_size =
			
 
				+      MatchingFlatSize(input1_shape, input2_shape, output_shape);
			
 
				+
			
 
				+  MulElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				+}
			
 
				+
			
 
				+inline void BroadcastMul4DSlow(const ArithmeticParams& params,
			
 
				+                               const RuntimeShape& input1_shape,
			
 
				+                               const uint8_t* input1_data,
			
 
				+                               const RuntimeShape& input2_shape,
			
 
				+                               const uint8_t* input2_data,
			
 
				+                               const RuntimeShape& output_shape,
			
 
				+                               uint8_t* output_data) {
			
 
				+  NdArrayDesc<4> desc1;
			
 
				+  NdArrayDesc<4> desc2;
			
 
				+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				+                                      &desc2);
			
 
				+  const RuntimeShape extended_output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, output_shape);
			
 
				+
			
 
				+  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
			
 
				+    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
			
 
				+      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
			
 
				+        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
			
 
				+          const int32_t input1_val =
			
 
				+              params.input1_offset +
			
 
				+              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
			
 
				+          const int32_t input2_val =
			
 
				+              params.input2_offset +
			
 
				+              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
			
 
				+          const int32_t unclamped_result =
			
 
				+              params.output_offset +
			
 
				+              MultiplyByQuantizedMultiplier(input1_val * input2_val,
			
 
				+                                            params.output_multiplier,
			
 
				+                                            params.output_shift);
			
 
				+          const int32_t clamped_output = std::min(
			
 
				+              params.quantized_activation_max,
			
 
				+              std::max(params.quantized_activation_min, unclamped_result));
			
 
				+          output_data[Offset(extended_output_shape, b, y, x, c)] =
			
 
				+              static_cast<uint8_t>(clamped_output);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+void BroadcastMul4DSlow(const ArithmeticParams& params,
			
 
				+                        const RuntimeShape& unextended_input1_shape,
			
 
				+                        const T* input1_data,
			
 
				+                        const RuntimeShape& unextended_input2_shape,
			
 
				+                        const T* input2_data,
			
 
				+                        const RuntimeShape& unextended_output_shape,
			
 
				+                        T* output_data) {
			
 
				+  T output_activation_min;
			
 
				+  T output_activation_max;
			
 
				+  GetActivationParams(params, &output_activation_min, &output_activation_max);
			
 
				+
			
 
				+  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
			
 
				+  const RuntimeShape output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
			
 
				+
			
 
				+  NdArrayDesc<4> desc1;
			
 
				+  NdArrayDesc<4> desc2;
			
 
				+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
			
 
				+                                      unextended_input2_shape, &desc1, &desc2);
			
 
				+
			
 
				+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				+  // col, channel), with extents (batches, height, width, depth), with the
			
 
				+  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				+  // typically 1 element).
			
 
				+  //
			
 
				+  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				+  // first dimension has smallest stride.
			
 
				+  //
			
 
				+  // We name our variables by their Tensorflow convention, but generate C code
			
 
				+  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				+  // best cache behavior.
			
 
				+  for (int b = 0; b < output_shape.Dims(0); ++b) {
			
 
				+    for (int y = 0; y < output_shape.Dims(1); ++y) {
			
 
				+      for (int x = 0; x < output_shape.Dims(2); ++x) {
			
 
				+        for (int c = 0; c < output_shape.Dims(3); ++c) {
			
 
				+          output_data[Offset(output_shape, b, y, x, c)] =
			
 
				+              ActivationFunctionWithMinMax(
			
 
				+                  input1_data[SubscriptToIndex(desc1, b, y, x, c)] *
			
 
				+                      input2_data[SubscriptToIndex(desc2, b, y, x, c)],
			
 
				+                  output_activation_min, output_activation_max);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/neg.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/neg.h
@@ -0,0 +1,37 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void Negate(const RuntimeShape& input_shape, const T* input_data,
			
 
				+                   const RuntimeShape& output_shape, T* output_data) {
			
 
				+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				+    output_data[i] = -input_data[i];
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/pad.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/pad.h
@@ -0,0 +1,162 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
			
 
				+
			
 
				+#include <vector>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+// TFLite Pad supports activation tensors with up to 4 dimensions.
			
 
				+constexpr int PadKernelMaxDimensionCount() { return 4; }
			
 
				+
			
 
				+// There are two versions of pad: Pad and PadV2.  In PadV2 there is a second
			
 
				+// scalar input that provides the padding value.  Therefore pad_value_ptr can be
			
 
				+// equivalent to a simple input1_data.  For Pad, it should point to a zero
			
 
				+// value.
			
 
				+//
			
 
				+// Note that two typenames are required, so that T=P=int32_t is considered a
			
 
				+// specialization distinct from P=int32_t.
			
 
				+template <typename T, typename P>
			
 
				+inline void PadImpl(const tflite::PadParams& op_params,
			
 
				+                    const RuntimeShape& input_shape, const T* input_data,
			
 
				+                    const P* pad_value_ptr, const RuntimeShape& output_shape,
			
 
				+                    T* output_data) {
			
 
				+  const RuntimeShape ext_input_shape =
			
 
				+      RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), input_shape);
			
 
				+  const RuntimeShape ext_output_shape =
			
 
				+      RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), output_shape);
			
 
				+  TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
			
 
				+  TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
			
 
				+
			
 
				+  // Runtime calls are currently fixed at 4 dimensions. Copy inputs so we can
			
 
				+  // pad them to 4 dims (yes, we are "padding the padding").
			
 
				+  int left_padding_copy[PadKernelMaxDimensionCount()];
			
 
				+  for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
			
 
				+    left_padding_copy[i] = 0;
			
 
				+  }
			
 
				+  for (int i = 0; i < op_params.left_padding_count; ++i) {
			
 
				+    left_padding_copy[i + PadKernelMaxDimensionCount() -
			
 
				+                      op_params.left_padding_count] = op_params.left_padding[i];
			
 
				+  }
			
 
				+  int right_padding_copy[PadKernelMaxDimensionCount()];
			
 
				+  for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
			
 
				+    right_padding_copy[i] = 0;
			
 
				+  }
			
 
				+  for (int i = 0; i < op_params.right_padding_count; ++i) {
			
 
				+    right_padding_copy[i + PadKernelMaxDimensionCount() -
			
 
				+                       op_params.right_padding_count] =
			
 
				+        op_params.right_padding[i];
			
 
				+  }
			
 
				+
			
 
				+  const int output_batch = ext_output_shape.Dims(0);
			
 
				+  const int output_height = ext_output_shape.Dims(1);
			
 
				+  const int output_width = ext_output_shape.Dims(2);
			
 
				+  const int output_depth = ext_output_shape.Dims(3);
			
 
				+
			
 
				+  const int left_b_padding = left_padding_copy[0];
			
 
				+  const int left_h_padding = left_padding_copy[1];
			
 
				+  const int left_w_padding = left_padding_copy[2];
			
 
				+  const int left_d_padding = left_padding_copy[3];
			
 
				+
			
 
				+  const int right_b_padding = right_padding_copy[0];
			
 
				+  const int right_h_padding = right_padding_copy[1];
			
 
				+  const int right_w_padding = right_padding_copy[2];
			
 
				+  const int right_d_padding = right_padding_copy[3];
			
 
				+
			
 
				+  const T pad_value = *pad_value_ptr;
			
 
				+
			
 
				+  const T* in_ptr = input_data;
			
 
				+  T* out_ptr = output_data;
			
 
				+  for (int out_b = 0; out_b < output_batch; ++out_b) {
			
 
				+    for (int out_h = 0; out_h < output_height; ++out_h) {
			
 
				+      for (int out_w = 0; out_w < output_width; ++out_w) {
			
 
				+        for (int out_d = 0; out_d < output_depth; ++out_d) {
			
 
				+          if (out_b < left_b_padding ||
			
 
				+              out_b >= output_batch - right_b_padding ||
			
 
				+              out_h < left_h_padding ||
			
 
				+              out_h >= output_height - right_h_padding ||
			
 
				+              out_w < left_w_padding ||
			
 
				+              out_w >= output_width - right_w_padding ||
			
 
				+              out_d < left_d_padding ||
			
 
				+              out_d >= output_depth - right_d_padding) {
			
 
				+            *out_ptr++ = pad_value;
			
 
				+          } else {
			
 
				+            *out_ptr++ = *in_ptr++;
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <typename T, typename P>
			
 
				+inline void Pad(const tflite::PadParams& op_params,
			
 
				+                const RuntimeShape& input_shape, const T* input_data,
			
 
				+                const P* pad_value_ptr, const RuntimeShape& output_shape,
			
 
				+                T* output_data) {
			
 
				+  PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
			
 
				+          output_data);
			
 
				+}
			
 
				+
			
 
				+// The second (pad-value) input can be int32_t when, say, the first is uint8_t.
			
 
				+template <typename T>
			
 
				+inline void Pad(const tflite::PadParams& op_params,
			
 
				+                const RuntimeShape& input_shape, const T* input_data,
			
 
				+                const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
			
 
				+                T* output_data) {
			
 
				+  const T converted_pad_value = static_cast<T>(*pad_value_ptr);
			
 
				+  PadImpl(op_params, input_shape, input_data, &converted_pad_value,
			
 
				+          output_shape, output_data);
			
 
				+}
			
 
				+
			
 
				+// This version avoids conflicting template matching.
			
 
				+template <>
			
 
				+inline void Pad(const tflite::PadParams& op_params,
			
 
				+                const RuntimeShape& input_shape, const int32_t* input_data,
			
 
				+                const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
			
 
				+                int32_t* output_data) {
			
 
				+  PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
			
 
				+          output_data);
			
 
				+}
			
 
				+
			
 
				+template <typename T, typename P>
			
 
				+inline void PadImageStyle(const tflite::PadParams& op_params,
			
 
				+                          const RuntimeShape& input_shape, const T* input_data,
			
 
				+                          const P* pad_value_ptr,
			
 
				+                          const RuntimeShape& output_shape, T* output_data) {
			
 
				+  Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
			
 
				+      output_data);
			
 
				+}
			
 
				+
			
 
				+template <typename P>
			
 
				+inline void PadImageStyle(const tflite::PadParams& op_params,
			
 
				+                          const RuntimeShape& input_shape,
			
 
				+                          const float* input_data, const P* pad_value_ptr,
			
 
				+                          const RuntimeShape& output_shape,
			
 
				+                          float* output_data) {
			
 
				+  Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
			
 
				+      output_data);
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/pooling.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/pooling.h
@@ -0,0 +1,297 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				+#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+inline void AveragePool(const PoolParams& params,
			
 
				+                        const RuntimeShape& input_shape,
			
 
				+                        const float* input_data,
			
 
				+                        const RuntimeShape& output_shape, float* output_data) {
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int channel = 0; channel < depth; ++channel) {
			
 
				+          const int in_x_origin =
			
 
				+              (out_x * stride_width) - params.padding_values.width;
			
 
				+          const int in_y_origin =
			
 
				+              (out_y * stride_height) - params.padding_values.height;
			
 
				+          // Compute the boundaries of the filter region clamped so as to
			
 
				+          // ensure that the filter window fits in the input array.
			
 
				+          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				+          const int filter_x_end =
			
 
				+              std::min(params.filter_width, input_width - in_x_origin);
			
 
				+          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				+          const int filter_y_end =
			
 
				+              std::min(params.filter_height, input_height - in_y_origin);
			
 
				+          float total = 0.f;
			
 
				+          float filter_count = 0;
			
 
				+          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				+               ++filter_y) {
			
 
				+            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				+                 ++filter_x) {
			
 
				+              const int in_x = in_x_origin + filter_x;
			
 
				+              const int in_y = in_y_origin + filter_y;
			
 
				+              total +=
			
 
				+                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
			
 
				+              filter_count++;
			
 
				+            }
			
 
				+          }
			
 
				+          const float average = total / filter_count;
			
 
				+          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				+              ActivationFunctionWithMinMax(average, params.float_activation_min,
			
 
				+                                           params.float_activation_max);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void AveragePool(const PoolParams& params,
			
 
				+                        const RuntimeShape& input_shape,
			
 
				+                        const uint8_t* input_data,
			
 
				+                        const RuntimeShape& output_shape,
			
 
				+                        uint8_t* output_data) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int channel = 0; channel < depth; ++channel) {
			
 
				+          const int in_x_origin =
			
 
				+              (out_x * stride_width) - params.padding_values.width;
			
 
				+          const int in_y_origin =
			
 
				+              (out_y * stride_height) - params.padding_values.height;
			
 
				+          // Compute the boundaries of the filter region clamped so as to
			
 
				+          // ensure that the filter window fits in the input array.
			
 
				+          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				+          const int filter_x_end =
			
 
				+              std::min(params.filter_width, input_width - in_x_origin);
			
 
				+          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				+          const int filter_y_end =
			
 
				+              std::min(params.filter_height, input_height - in_y_origin);
			
 
				+          int32_t acc = 0;
			
 
				+          int filter_count = 0;
			
 
				+          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				+               ++filter_y) {
			
 
				+            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				+                 ++filter_x) {
			
 
				+              const int in_x = in_x_origin + filter_x;
			
 
				+              const int in_y = in_y_origin + filter_y;
			
 
				+              acc +=
			
 
				+                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
			
 
				+              filter_count++;
			
 
				+            }
			
 
				+          }
			
 
				+          acc = (acc + filter_count / 2) / filter_count;
			
 
				+          acc = std::max(acc, params.quantized_activation_min);
			
 
				+          acc = std::min(acc, params.quantized_activation_max);
			
 
				+          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				+              static_cast<uint8_t>(acc);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape,
			
 
				+                   const float* input_data, const RuntimeShape& output_shape,
			
 
				+                   float* output_data) {
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int channel = 0; channel < depth; ++channel) {
			
 
				+          const int in_x_origin =
			
 
				+              (out_x * stride_width) - params.padding_values.width;
			
 
				+          const int in_y_origin =
			
 
				+              (out_y * stride_height) - params.padding_values.height;
			
 
				+          // Compute the boundaries of the filter region clamped so as to
			
 
				+          // ensure that the filter window fits in the input array.
			
 
				+          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				+          const int filter_x_end =
			
 
				+              std::min(params.filter_width, input_width - in_x_origin);
			
 
				+          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				+          const int filter_y_end =
			
 
				+              std::min(params.filter_height, input_height - in_y_origin);
			
 
				+          float sum_squares = 0.f;
			
 
				+          int filter_count = 0;
			
 
				+          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				+               ++filter_y) {
			
 
				+            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				+                 ++filter_x) {
			
 
				+              const int in_x = in_x_origin + filter_x;
			
 
				+              const int in_y = in_y_origin + filter_y;
			
 
				+              const float val =
			
 
				+                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
			
 
				+              sum_squares += val * val;
			
 
				+              filter_count++;
			
 
				+            }
			
 
				+          }
			
 
				+          const float l2pool_result = std::sqrt(sum_squares / filter_count);
			
 
				+          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				+              ActivationFunctionWithMinMax(l2pool_result,
			
 
				+                                           params.float_activation_min,
			
 
				+                                           params.float_activation_max);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
			
 
				+                    const float* input_data, const RuntimeShape& output_shape,
			
 
				+                    float* output_data) {
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int channel = 0; channel < depth; ++channel) {
			
 
				+          const int in_x_origin =
			
 
				+              (out_x * stride_width) - params.padding_values.width;
			
 
				+          const int in_y_origin =
			
 
				+              (out_y * stride_height) - params.padding_values.height;
			
 
				+          // Compute the boundaries of the filter region clamped so as to
			
 
				+          // ensure that the filter window fits in the input array.
			
 
				+          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				+          const int filter_x_end =
			
 
				+              std::min(params.filter_width, input_width - in_x_origin);
			
 
				+          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				+          const int filter_y_end =
			
 
				+              std::min(params.filter_height, input_height - in_y_origin);
			
 
				+          float max = std::numeric_limits<float>::lowest();
			
 
				+          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				+               ++filter_y) {
			
 
				+            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				+                 ++filter_x) {
			
 
				+              const int in_x = in_x_origin + filter_x;
			
 
				+              const int in_y = in_y_origin + filter_y;
			
 
				+              max = std::max(
			
 
				+                  max,
			
 
				+                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
			
 
				+            }
			
 
				+          }
			
 
				+          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				+              ActivationFunctionWithMinMax(max, params.float_activation_min,
			
 
				+                                           params.float_activation_max);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
			
 
				+                    const uint8_t* input_data, const RuntimeShape& output_shape,
			
 
				+                    uint8_t* output_data) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+  TFLITE_DCHECK_GE(params.quantized_activation_min, 0);
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_max, 255);
			
 
				+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  const int stride_height = params.stride_height;
			
 
				+  const int stride_width = params.stride_width;
			
 
				+  for (int batch = 0; batch < batches; ++batch) {
			
 
				+    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				+      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				+        for (int channel = 0; channel < depth; ++channel) {
			
 
				+          const int in_x_origin =
			
 
				+              (out_x * stride_width) - params.padding_values.width;
			
 
				+          const int in_y_origin =
			
 
				+              (out_y * stride_height) - params.padding_values.height;
			
 
				+          // Compute the boundaries of the filter region clamped so as to
			
 
				+          // ensure that the filter window fits in the input array.
			
 
				+          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				+          const int filter_x_end =
			
 
				+              std::min(params.filter_width, input_width - in_x_origin);
			
 
				+          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				+          const int filter_y_end =
			
 
				+              std::min(params.filter_height, input_height - in_y_origin);
			
 
				+          uint8_t max = 0;
			
 
				+          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				+               ++filter_y) {
			
 
				+            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				+                 ++filter_x) {
			
 
				+              const int in_x = in_x_origin + filter_x;
			
 
				+              const int in_y = in_y_origin + filter_y;
			
 
				+              max = std::max(
			
 
				+                  max,
			
 
				+                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
			
 
				+            }
			
 
				+          }
			
 
				+          max = std::max<uint8_t>(max, params.quantized_activation_min);
			
 
				+          max = std::min<uint8_t>(max, params.quantized_activation_max);
			
 
				+          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				+              static_cast<uint8_t>(max);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/prelu.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/prelu.h
@@ -0,0 +1,109 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+// Broadcast prelu to output_shape for quantized uint8_t/int8_t data.
			
 
				+template <typename T>
			
 
				+inline void BroadcastPrelu4DSlow(
			
 
				+    const PreluParams& params, const RuntimeShape& input_shape,
			
 
				+    const T* input_data, const RuntimeShape& alpha_shape, const T* alpha_data,
			
 
				+    const RuntimeShape& output_shape, T* output_data) {
			
 
				+  TFLITE_DCHECK_LE(input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(alpha_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
			
 
				+  const RuntimeShape extended_output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, output_shape);
			
 
				+  NdArrayDesc<4> desc1;
			
 
				+  NdArrayDesc<4> desc2;
			
 
				+  NdArrayDescsForElementwiseBroadcast(input_shape, alpha_shape, &desc1, &desc2);
			
 
				+
			
 
				+  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
			
 
				+    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
			
 
				+      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
			
 
				+        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
			
 
				+          int output_index = Offset(extended_output_shape, b, y, x, c);
			
 
				+          int input_index = SubscriptToIndex(desc1, b, y, x, c);
			
 
				+          const int32_t input_value =
			
 
				+              params.input_offset + input_data[input_index];
			
 
				+          int32_t output_value;
			
 
				+          if (input_value >= 0) {
			
 
				+            output_value = MultiplyByQuantizedMultiplier(
			
 
				+                input_value, params.output_multiplier_1, params.output_shift_1);
			
 
				+          } else {
			
 
				+            auto alpha_index = SubscriptToIndex(desc2, b, y, x, c);
			
 
				+            const int32_t alpha_value =
			
 
				+                params.alpha_offset + alpha_data[alpha_index];
			
 
				+
			
 
				+            output_value = MultiplyByQuantizedMultiplier(
			
 
				+                input_value * alpha_value, params.output_multiplier_2,
			
 
				+                params.output_shift_2);
			
 
				+          }
			
 
				+          output_value += params.output_offset;
			
 
				+
			
 
				+          const int32_t quantized_min = std::numeric_limits<T>::min();
			
 
				+          const int32_t quantized_max = std::numeric_limits<T>::max();
			
 
				+          const int32_t clamped_output =
			
 
				+              std::min(quantized_max, std::max(quantized_min, output_value));
			
 
				+          output_data[output_index] = static_cast<T>(clamped_output);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void Prelu(const PreluParams& params, const RuntimeShape& input_shape,
			
 
				+                  const T* input_data, const RuntimeShape& alpha_shape,
			
 
				+                  const T* alpha_data, const RuntimeShape& output_shape,
			
 
				+                  T* output_data) {
			
 
				+  const int32_t quantized_min = std::numeric_limits<T>::min();
			
 
				+  const int32_t quantized_max = std::numeric_limits<T>::max();
			
 
				+
			
 
				+  const int flat_size =
			
 
				+      MatchingElementsSize(input_shape, alpha_shape, output_shape);
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				+    const int32_t input_value = params.input_offset + input_data[i];
			
 
				+    int32_t output_value;
			
 
				+    if (input_value >= 0) {
			
 
				+      output_value = MultiplyByQuantizedMultiplier(
			
 
				+          input_value, params.output_multiplier_1, params.output_shift_1);
			
 
				+    } else {
			
 
				+      const int32_t alpha_value = params.alpha_offset + alpha_data[i];
			
 
				+
			
 
				+      output_value = MultiplyByQuantizedMultiplier(input_value * alpha_value,
			
 
				+                                                   params.output_multiplier_2,
			
 
				+                                                   params.output_shift_2);
			
 
				+    }
			
 
				+    output_value += params.output_offset;
			
 
				+
			
 
				+    const int32_t clamped_output =
			
 
				+        std::min(quantized_max, std::max(quantized_min, output_value));
			
 
				+    output_data[i] = static_cast<T>(clamped_output);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h
@@ -0,0 +1,138 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+// Consolidates dimensions in broadcast inputs, checks for five-fold pattern.
			
 
				+//
			
 
				+// For example, if sequence of dimensions of one input is
			
 
				+// ..., 1, 3, 1, 7, 9, 5,... and the other is ..., 2, 3, 1, 7, 1, 1, ...
			
 
				+// we can consolidate these as
			
 
				+// ..., 1, 3*7, 9*5, ... and 2, 3*7, 1.
			
 
				+//
			
 
				+// The category is updated in the less-frequent case of shapes that are
			
 
				+// not suited to a fivefold-loop broadcast.
			
 
				+//
			
 
				+// Falls back to generic pattern when it does not know how to process properly.
			
 
				+//
			
 
				+// Returns true iff there is some sort of broadcast, which includes five-fold
			
 
				+// patterns and falling back to generic broadcast.
			
 
				+inline bool ProcessBroadcastShapes(const RuntimeShape& shape0,
			
 
				+                                   const RuntimeShape& shape1,
			
 
				+                                   tflite::ArithmeticParams* params) {
			
 
				+  const int dims_count =
			
 
				+      std::max(shape0.DimensionsCount(), shape1.DimensionsCount());
			
 
				+
			
 
				+  params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
			
 
				+  RuntimeShape scalar_shape(dims_count, 1);
			
 
				+
			
 
				+  auto extended_shape0 = RuntimeShape::ExtendedShape(dims_count, shape0);
			
 
				+  auto extended_shape1 = RuntimeShape::ExtendedShape(dims_count, shape1);
			
 
				+
			
 
				+  // Check for "exact" match, implicitly accepting any scalar shapes.
			
 
				+  if (extended_shape0 == extended_shape1) {
			
 
				+    params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  for (int i = dims_count - 1; i >= 0; --i) {
			
 
				+    if (extended_shape0.Dims(i) == extended_shape1.Dims(i)) {
			
 
				+      continue;
			
 
				+    } else if (extended_shape0.Dims(i) == 1) {
			
 
				+      params->broadcast_category =
			
 
				+          BroadcastableOpCategory::kFirstInputBroadcastsFast;
			
 
				+      break;
			
 
				+    } else if (extended_shape1.Dims(i) == 1) {
			
 
				+      params->broadcast_category =
			
 
				+          BroadcastableOpCategory::kSecondInputBroadcastsFast;
			
 
				+      break;
			
 
				+    } else {
			
 
				+      // This case is erroneous: there is a dimension that does not match and
			
 
				+      // is not a broadcast from one shape to the other.
			
 
				+      params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
			
 
				+      return true;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  if (params->broadcast_category !=
			
 
				+          BroadcastableOpCategory::kFirstInputBroadcastsFast &&
			
 
				+      params->broadcast_category !=
			
 
				+          BroadcastableOpCategory::kSecondInputBroadcastsFast) {
			
 
				+    // This is unreachable because at least one else clause in the above loop
			
 
				+    // must be reached.
			
 
				+    TFLITE_DCHECK(false);
			
 
				+    params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // From this point it is assumed contractually that corresponding dimensions
			
 
				+  // in shape0 and shape1 are either (a) equal or (b) one or other equals 1.
			
 
				+  const bool swap_inputs = params->broadcast_category ==
			
 
				+                           BroadcastableOpCategory::kSecondInputBroadcastsFast;
			
 
				+  const RuntimeShape* shape_a =
			
 
				+      swap_inputs ? &extended_shape1 : &extended_shape0;
			
 
				+  const RuntimeShape* shape_b =
			
 
				+      swap_inputs ? &extended_shape0 : &extended_shape1;
			
 
				+
			
 
				+  int i = dims_count - 1;
			
 
				+  params->broadcast_shape[0] = 1;
			
 
				+  params->broadcast_shape[1] = 1;
			
 
				+  params->broadcast_shape[2] = 1;
			
 
				+  params->broadcast_shape[3] = 1;
			
 
				+  params->broadcast_shape[4] = 1;
			
 
				+  // y_0 is greedy: include dims if both or neither equal 1: in other words,
			
 
				+  // test for equality rather than (shape_a->Dims(i) != 1).
			
 
				+  while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
			
 
				+    params->broadcast_shape[4] *= shape_b->Dims(i);
			
 
				+    --i;
			
 
				+  }
			
 
				+  // Here either input_a or input_b has dim of 1 (if i >= 0).  If it is input_b
			
 
				+  // that has the unit dimension, the next two loops are not entered.
			
 
				+  while (i >= 0 && shape_a->Dims(i) == 1) {
			
 
				+    params->broadcast_shape[3] *= shape_b->Dims(i);
			
 
				+    --i;
			
 
				+  }
			
 
				+  while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
			
 
				+    params->broadcast_shape[2] *= shape_a->Dims(i);
			
 
				+    --i;
			
 
				+  }
			
 
				+  // Here either input_a or input_b has dim of 1 (if i >= 0).
			
 
				+  while (i >= 0 && shape_b->Dims(i) == 1) {
			
 
				+    params->broadcast_shape[1] *= shape_a->Dims(i);
			
 
				+    --i;
			
 
				+  }
			
 
				+  while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
			
 
				+    params->broadcast_shape[0] *= shape_b->Dims(i);
			
 
				+    --i;
			
 
				+  }
			
 
				+
			
 
				+  // Rarer case is when the broadcast dimensions cannot be handled by a fivefold
			
 
				+  // loop.
			
 
				+  if (i >= 0) {
			
 
				+    params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
			
 
				+  }
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/quantize.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/quantize.h
@@ -0,0 +1,55 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
			
 
				+
			
 
				+#include <algorithm>
			
 
				+#include <limits>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+template <typename InputT, typename OutputT>
			
 
				+inline void AffineQuantize(const tflite::QuantizationParams& op_params,
			
 
				+                           const RuntimeShape& input_shape,
			
 
				+                           const InputT* input_data,
			
 
				+                           const RuntimeShape& output_shape,
			
 
				+                           OutputT* output_data) {
			
 
				+  const int32_t zero_point = op_params.zero_point;
			
 
				+  const double scale = op_params.scale;
			
 
				+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+  static constexpr int32_t min_val = std::numeric_limits<OutputT>::min();
			
 
				+  static constexpr int32_t max_val = std::numeric_limits<OutputT>::max();
			
 
				+
			
 
				+  for (int i = 0; i < flat_size; i++) {
			
 
				+    const InputT val = input_data[i];
			
 
				+    int32_t unclamped =
			
 
				+        static_cast<int32_t>(TfLiteRound(val / static_cast<float>(scale))) +
			
 
				+        zero_point;
			
 
				+    int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
			
 
				+    output_data[i] = clamped;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/reduce.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/reduce.h
@@ -0,0 +1,405 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
			
 
				+
			
 
				+#include "ruy/profiler/instrumentation.h"  // from @ruy
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				+#include "tensorflow/lite/kernels/internal/max.h"
			
 
				+#include "tensorflow/lite/kernels/internal/min.h"
			
 
				+#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+// A generic reduce method that can be used for reduce_sum, reduce_mean, etc.
			
 
				+// This method iterates through input data and reduce elements along the
			
 
				+// dimensions given in axis.
			
 
				+template <typename In, typename Out>
			
 
				+inline bool Reduce(const In* input_data, const int* input_dims,
			
 
				+                   const int* output_dims, const int input_num_dims,
			
 
				+                   const int output_num_dims, const int* axis,
			
 
				+                   const int num_axis, int* input_iter,
			
 
				+                   Out reducer(const Out current, const In in),
			
 
				+                   Out* output_data) {
			
 
				+  // Reset input iterator.
			
 
				+  for (int idx = 0; idx < input_num_dims; ++idx) {
			
 
				+    input_iter[idx] = 0;
			
 
				+  }
			
 
				+  // Iterate through input_data.
			
 
				+  do {
			
 
				+    size_t input_offset =
			
 
				+        ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
			
 
				+    size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims,
			
 
				+                                               input_iter, num_axis, axis);
			
 
				+    output_data[output_offset] =
			
 
				+        reducer(output_data[output_offset], input_data[input_offset]);
			
 
				+  } while (NextIndex(input_num_dims, input_dims, input_iter));
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+// This method parses the input 'axis' to remove duplicates and handle negative
			
 
				+// values, and returns a valid 'out_axis'
			
 
				+inline bool ResolveAxis(const int num_dims, const int* axis,
			
 
				+                        const int64_t num_axis, int* out_axis,
			
 
				+                        int* out_num_axis) {
			
 
				+  *out_num_axis = 0;  // Just in case.
			
 
				+  // Short-circuit axis resolution for scalars; the axis will go unused.
			
 
				+  if (num_dims == 0) {
			
 
				+    return true;
			
 
				+  }
			
 
				+  // o(n^2) is fine since out_num_axis should be really small, mostly <= 4
			
 
				+  for (int64_t idx = 0; idx < num_axis; ++idx) {
			
 
				+    // Handle negative index. A positive index 'p_idx' can be represented as a
			
 
				+    // negative index 'n_idx' as: n_idx = p_idx-num_dims
			
 
				+    // eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1]  */
			
 
				+    int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
			
 
				+    TFLITE_DCHECK(current >= 0 && current < num_dims);
			
 
				+    bool is_dup = false;
			
 
				+    for (int j = 0; j < *out_num_axis; ++j) {
			
 
				+      if (out_axis[j] == current) {
			
 
				+        is_dup = true;
			
 
				+        break;
			
 
				+      }
			
 
				+    }
			
 
				+    if (!is_dup) {
			
 
				+      out_axis[*out_num_axis] = current;
			
 
				+      *out_num_axis += 1;
			
 
				+    }
			
 
				+  }
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+// This method expects that output_data has been initialized.
			
 
				+template <typename In, typename Out>
			
 
				+inline bool ReduceSumImpl(const In* input_data, const int* input_dims,
			
 
				+                          const int* output_dims, const int input_num_dims,
			
 
				+                          const int output_num_dims, const int* axis,
			
 
				+                          const int num_axis, int* input_iter,
			
 
				+                          Out* output_data) {
			
 
				+  auto reducer = [](const Out current, const In in) -> Out {
			
 
				+    const Out actual_in = static_cast<Out>(in);
			
 
				+    return current + actual_in;
			
 
				+  };
			
 
				+  return Reduce<In, Out>(input_data, input_dims, output_dims, input_num_dims,
			
 
				+                         output_num_dims, axis, num_axis, input_iter, reducer,
			
 
				+                         output_data);
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline bool InitTensorDataForReduce(const int* dims, const int num_dims,
			
 
				+                                    const T init_value, T* data) {
			
 
				+  size_t num_elements = 1;
			
 
				+  for (int idx = 0; idx < num_dims; ++idx) {
			
 
				+    size_t current = static_cast<size_t>(dims[idx]);
			
 
				+    // Overflow prevention.
			
 
				+    if (num_elements > std::numeric_limits<size_t>::max() / current) {
			
 
				+      return false;
			
 
				+    }
			
 
				+    num_elements *= current;
			
 
				+  }
			
 
				+  for (size_t idx = 0; idx < num_elements; ++idx) {
			
 
				+    data[idx] = init_value;
			
 
				+  }
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+// Computes the generic value (i.e., sum/max/min/prod) of elements across
			
 
				+// dimensions given in axis. It needs to pass in init_value and reducer.
			
 
				+template <typename T>
			
 
				+inline bool ReduceGeneric(const T* input_data, const int* input_dims,
			
 
				+                          const int input_num_dims, T* output_data,
			
 
				+                          const int* output_dims, const int output_num_dims,
			
 
				+                          const int* axis, const int64_t num_axis_dimensions,
			
 
				+                          bool keep_dims, int* temp_index, int* resolved_axis,
			
 
				+                          T init_value,
			
 
				+                          T reducer(const T current, const T in)) {
			
 
				+  // Reset output data.
			
 
				+  if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value,
			
 
				+                               output_data)) {
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // Resolve axis.
			
 
				+  int num_resolved_axis = 0;
			
 
				+  if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
			
 
				+                   &num_resolved_axis)) {
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  return Reduce<T, T>(input_data, input_dims, output_dims, input_num_dims,
			
 
				+                      output_num_dims, resolved_axis, num_resolved_axis,
			
 
				+                      temp_index, reducer, output_data);
			
 
				+}
			
 
				+
			
 
				+// Computes the mean of elements across dimensions given in axis.
			
 
				+// It does so in two stages, first calculates the sum of elements along the axis
			
 
				+// then divides it by the number of element in axis.
			
 
				+template <typename T, typename U>
			
 
				+inline bool Mean(const T* input_data, const int* input_dims,
			
 
				+                 const int input_num_dims, T* output_data,
			
 
				+                 const int* output_dims, const int output_num_dims,
			
 
				+                 const int* axis, const int num_axis_dimensions, bool keep_dims,
			
 
				+                 int* temp_index, int* resolved_axis, U* temp_sum) {
			
 
				+  ruy::profiler::ScopeLabel label("Mean");
			
 
				+  // Reset output data.
			
 
				+  size_t num_outputs = 1;
			
 
				+  for (int idx = 0; idx < output_num_dims; ++idx) {
			
 
				+    size_t current = static_cast<size_t>(output_dims[idx]);
			
 
				+    // Overflow prevention.
			
 
				+    if (num_outputs > std::numeric_limits<size_t>::max() / current) {
			
 
				+      return false;
			
 
				+    }
			
 
				+    num_outputs *= current;
			
 
				+  }
			
 
				+  for (size_t idx = 0; idx < num_outputs; ++idx) {
			
 
				+    output_data[idx] = T();
			
 
				+    temp_sum[idx] = U();
			
 
				+  }
			
 
				+
			
 
				+  // Resolve axis.
			
 
				+  int num_resolved_axis = 0;
			
 
				+  if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
			
 
				+                   &num_resolved_axis)) {
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  if (!ReduceSumImpl<T, U>(input_data, input_dims, output_dims, input_num_dims,
			
 
				+                           output_num_dims, resolved_axis, num_resolved_axis,
			
 
				+                           temp_index, temp_sum)) {
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // Calculate mean by dividing output_data by num of aggregated element.
			
 
				+  U num_elements_in_axis = 1;
			
 
				+  for (int idx = 0; idx < num_resolved_axis; ++idx) {
			
 
				+    size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
			
 
				+    // Overflow prevention.
			
 
				+    if (current > (std::numeric_limits<U>::max() / num_elements_in_axis)) {
			
 
				+      return false;
			
 
				+    }
			
 
				+    num_elements_in_axis *= current;
			
 
				+  }
			
 
				+
			
 
				+  if (num_elements_in_axis > 0) {
			
 
				+    for (size_t idx = 0; idx < num_outputs; ++idx) {
			
 
				+      output_data[idx] =
			
 
				+          static_cast<T>(temp_sum[idx] / static_cast<U>(num_elements_in_axis));
			
 
				+    }
			
 
				+  }
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void Mean(const tflite::MeanParams& op_params,
			
 
				+                 const RuntimeShape& unextended_input_shape,
			
 
				+                 const T* input_data,
			
 
				+                 const RuntimeShape& unextended_output_shape, T* output_data) {
			
 
				+  ruy::profiler::ScopeLabel label("Mean4D");
			
 
				+
			
 
				+  // Current implementation only supports dimension equals 4 and simultaneous
			
 
				+  // reduction over width and height.
			
 
				+  TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
			
 
				+  const RuntimeShape input_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
			
 
				+  const RuntimeShape output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
			
 
				+
			
 
				+  const int output_batch = output_shape.Dims(0);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  const int output_depth = output_shape.Dims(3);
			
 
				+
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+
			
 
				+  TFLITE_CHECK_EQ(op_params.axis_count, 2);
			
 
				+  TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
			
 
				+               (op_params.axis[0] == 2 && op_params.axis[1] == 1));
			
 
				+  TFLITE_CHECK_EQ(output_height, 1);
			
 
				+  TFLITE_CHECK_EQ(output_width, 1);
			
 
				+
			
 
				+  for (int out_b = 0; out_b < output_batch; ++out_b) {
			
 
				+    for (int out_d = 0; out_d < output_depth; ++out_d) {
			
 
				+      float value = 0;
			
 
				+      for (int in_h = 0; in_h < input_height; ++in_h) {
			
 
				+        for (int in_w = 0; in_w < input_width; ++in_w) {
			
 
				+          value += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
			
 
				+        }
			
 
				+      }
			
 
				+      output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
			
 
				+          value / (input_width * input_height);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void Mean(const tflite::MeanParams& op_params,
			
 
				+                 const RuntimeShape& unextended_input_shape,
			
 
				+                 const uint8_t* input_data, int32_t input_zero_point,
			
 
				+                 float input_scale, const RuntimeShape& unextended_output_shape,
			
 
				+                 uint8_t* output_data, int32_t output_zero_point,
			
 
				+                 float output_scale) {
			
 
				+  ruy::profiler::ScopeLabel label("Mean4D/Uint8");
			
 
				+
			
 
				+  // Current implementation only supports dimension equals 4 and simultaneous
			
 
				+  // reduction over width and height.
			
 
				+  TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
			
 
				+  const RuntimeShape input_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
			
 
				+  const RuntimeShape output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
			
 
				+  const int output_batch = output_shape.Dims(0);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  const int output_depth = output_shape.Dims(3);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const float num_elements_in_axis = input_width * input_height;
			
 
				+
			
 
				+  TFLITE_CHECK_EQ(op_params.axis_count, 2);
			
 
				+  TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
			
 
				+               (op_params.axis[0] == 2 && op_params.axis[1] == 1));
			
 
				+  TFLITE_CHECK_EQ(output_height, 1);
			
 
				+  TFLITE_CHECK_EQ(output_width, 1);
			
 
				+
			
 
				+  constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
			
 
				+  constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();
			
 
				+
			
 
				+  int32_t bias =
			
 
				+      output_zero_point -
			
 
				+      static_cast<int32_t>(input_zero_point * input_scale / output_scale);
			
 
				+  double real_scale =
			
 
				+      static_cast<double>(input_scale / (num_elements_in_axis * output_scale));
			
 
				+
			
 
				+  int32_t multiplier;
			
 
				+  int shift;
			
 
				+  QuantizeMultiplier(real_scale, &multiplier, &shift);
			
 
				+  for (int out_b = 0; out_b < output_batch; ++out_b) {
			
 
				+    for (int out_d = 0; out_d < output_depth; ++out_d) {
			
 
				+      int32_t acc = 0;
			
 
				+      for (int in_h = 0; in_h < input_height; ++in_h) {
			
 
				+        for (int in_w = 0; in_w < input_width; ++in_w) {
			
 
				+          acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
			
 
				+        }
			
 
				+      }
			
 
				+      acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
			
 
				+      acc += bias;
			
 
				+      acc = std::min(std::max(acc, kMinValue), kMaxValue);
			
 
				+      output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
			
 
				+          static_cast<uint8_t>(acc);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Computes the mean of elements across dimensions given in axis.
			
 
				+// It does so in two stages, first calculates the sum of elements along the axis
			
 
				+// then divides it by the number of element in axis for quantized values.
			
 
				+template <typename T, typename U>
			
 
				+inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
			
 
				+                               float input_scale, const int* input_dims,
			
 
				+                               const int input_num_dims, T* output_data,
			
 
				+                               int32_t output_zero_point, float output_scale,
			
 
				+                               const int* output_dims,
			
 
				+                               const int output_num_dims, const int* axis,
			
 
				+                               const int num_axis_dimensions, bool keep_dims,
			
 
				+                               int* temp_index, int* resolved_axis, U* temp_sum,
			
 
				+                               bool compute_sum) {
			
 
				+  const bool uint8_case = std::is_same<T, uint8_t>::value;
			
 
				+  const bool int16_case = std::is_same<T, int16_t>::value;
			
 
				+  if (uint8_case) {
			
 
				+    ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Uint8" : "Mean/Uint8");
			
 
				+  } else if (int16_case) {
			
 
				+    ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int16" : "Mean/Int16");
			
 
				+  } else {
			
 
				+    ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int8" : "Mean/Int8");
			
 
				+  }
			
 
				+  // Reset output data.
			
 
				+  size_t num_outputs = 1;
			
 
				+  for (int idx = 0; idx < output_num_dims; ++idx) {
			
 
				+    size_t current = static_cast<size_t>(output_dims[idx]);
			
 
				+    // Overflow prevention.
			
 
				+    if (num_outputs > std::numeric_limits<size_t>::max() / current) {
			
 
				+      return false;
			
 
				+    }
			
 
				+    num_outputs *= current;
			
 
				+  }
			
 
				+  for (size_t idx = 0; idx < num_outputs; ++idx) {
			
 
				+    output_data[idx] = T();
			
 
				+    temp_sum[idx] = U();
			
 
				+  }
			
 
				+
			
 
				+  // Resolve axis.
			
 
				+  int num_resolved_axis = 0;
			
 
				+  if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
			
 
				+                   &num_resolved_axis)) {
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  if (!ReduceSumImpl<T, U>(input_data, input_dims, output_dims, input_num_dims,
			
 
				+                           output_num_dims, resolved_axis, num_resolved_axis,
			
 
				+                           temp_index, temp_sum)) {
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // Calculate mean by dividing output_data by num of aggregated element.
			
 
				+  U num_elements_in_axis = 1;
			
 
				+  for (int idx = 0; idx < num_resolved_axis; ++idx) {
			
 
				+    size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
			
 
				+    // Overflow prevention.
			
 
				+    if (current > (std::numeric_limits<U>::max() / num_elements_in_axis)) {
			
 
				+      return false;
			
 
				+    }
			
 
				+    num_elements_in_axis *= current;
			
 
				+  }
			
 
				+
			
 
				+  if (num_elements_in_axis > 0) {
			
 
				+    const float scale = input_scale / output_scale;
			
 
				+    if (compute_sum) {
			
 
				+      // TODO(b/116341117): Eliminate float and do this completely in 8bit.
			
 
				+      const float bias =
			
 
				+          -input_zero_point * scale * num_elements_in_axis + 0.5f;
			
 
				+      for (size_t idx = 0; idx < num_outputs; ++idx) {
			
 
				+        const U value =
			
 
				+            static_cast<U>(TfLiteRound(temp_sum[idx] * scale + bias)) +
			
 
				+            output_zero_point;
			
 
				+        output_data[idx] = static_cast<T>(value);
			
 
				+      }
			
 
				+    } else {
			
 
				+      const float bias = -input_zero_point * scale + 0.5f;
			
 
				+      for (size_t idx = 0; idx < num_outputs; ++idx) {
			
 
				+        float float_mean = static_cast<float>(temp_sum[idx]) /
			
 
				+                           static_cast<float>(num_elements_in_axis);
			
 
				+        float result = TfLiteMin(
			
 
				+            TfLiteRound(float_mean * scale + bias) + output_zero_point,
			
 
				+            static_cast<float>(std::numeric_limits<T>::max()));
			
 
				+        result = TfLiteMax(result,
			
 
				+                           static_cast<float>(std::numeric_limits<T>::min()));
			
 
				+        output_data[idx] = static_cast<T>(result);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/requantize.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/requantize.h
@@ -0,0 +1,67 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
			
 
				+
			
 
				+#include "ruy/profiler/instrumentation.h"  // from @ruy
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+template <typename input_type, typename output_type>
			
 
				+inline void Requantize(const input_type* input_data, int32_t size,
			
 
				+                       int32_t effective_scale_multiplier,
			
 
				+                       int32_t effective_scale_shift, int32_t input_zeropoint,
			
 
				+                       int32_t output_zeropoint, output_type* output_data) {
			
 
				+  ruy::profiler::ScopeLabel label("Requantize");
			
 
				+  const bool same_scale =
			
 
				+      (effective_scale_multiplier == 1 << 30 && effective_scale_shift == 1);
			
 
				+  if (same_scale) {
			
 
				+    const bool mixed_type_int8_uint8 =
			
 
				+        std::is_same<input_type, int8_t>::value &&
			
 
				+        std::is_same<output_type, uint8_t>::value;
			
 
				+    const bool mixed_type_uint8_int8 =
			
 
				+        std::is_same<input_type, uint8_t>::value &&
			
 
				+        std::is_same<output_type, int8_t>::value;
			
 
				+    const int32_t zero_point_diff = input_zeropoint - output_zeropoint;
			
 
				+    // Fast path to do requantization for the case when just a shift of 128 is
			
 
				+    // needed.
			
 
				+    if ((mixed_type_int8_uint8 && zero_point_diff == -128) ||
			
 
				+        (mixed_type_uint8_int8 && zero_point_diff == 128)) {
			
 
				+      for (int i = 0; i < size; ++i) {
			
 
				+        output_data[i] = input_data[i] ^ 0x80;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  static constexpr int32_t kMinOutput = std::numeric_limits<output_type>::min();
			
 
				+  static constexpr int32_t kMaxOutput = std::numeric_limits<output_type>::max();
			
 
				+  for (int i = 0; i < size; ++i) {
			
 
				+    const int32_t input = input_data[i] - input_zeropoint;
			
 
				+    const int32_t output =
			
 
				+        MultiplyByQuantizedMultiplier(input, effective_scale_multiplier,
			
 
				+                                      effective_scale_shift) +
			
 
				+        output_zeropoint;
			
 
				+    const int32_t clamped_output =
			
 
				+        std::max(std::min(output, kMaxOutput), kMinOutput);
			
 
				+    output_data[i] = static_cast<output_type>(clamped_output);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h
@@ -0,0 +1,101 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
			
 
				+
			
 
				+#include <cmath>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+inline int32_t GetNearestNeighbor(const int input_value,
			
 
				+                                  const int32_t input_size,
			
 
				+                                  const int32_t output_size,
			
 
				+                                  const bool align_corners,
			
 
				+                                  const bool half_pixel_centers) {
			
 
				+  const float scale =
			
 
				+      (align_corners && output_size > 1)
			
 
				+          ? (input_size - 1) / static_cast<float>(output_size - 1)
			
 
				+          : input_size / static_cast<float>(output_size);
			
 
				+  const float offset = half_pixel_centers ? 0.5f : 0.0f;
			
 
				+  int32_t output_value = std::min(
			
 
				+      align_corners
			
 
				+          ? static_cast<int32_t>(TfLiteRound((input_value + offset) * scale))
			
 
				+          : static_cast<int32_t>(std::floor((input_value + offset) * scale)),
			
 
				+      input_size - 1);
			
 
				+  if (half_pixel_centers) {
			
 
				+    output_value = std::max(static_cast<int32_t>(0), output_value);
			
 
				+  }
			
 
				+  return output_value;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void ResizeNearestNeighbor(
			
 
				+    const tflite::ResizeNearestNeighborParams& op_params,
			
 
				+    const RuntimeShape& unextended_input_shape, const T* input_data,
			
 
				+    const RuntimeShape& output_size_shape, const int32_t* output_size_data,
			
 
				+    const RuntimeShape& unextended_output_shape, T* output_data) {
			
 
				+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
			
 
				+
			
 
				+  const RuntimeShape input_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
			
 
				+  const RuntimeShape output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
			
 
				+
			
 
				+  int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  int32_t input_height = input_shape.Dims(1);
			
 
				+  int32_t input_width = input_shape.Dims(2);
			
 
				+  int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				+
			
 
				+  // The Tensorflow version of this op allows resize on the width and height
			
 
				+  // axis only.
			
 
				+  TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2);
			
 
				+  int32_t output_height = output_size_data[0];
			
 
				+  int32_t output_width = output_size_data[1];
			
 
				+
			
 
				+  const int col_offset = input_shape.Dims(3);
			
 
				+  const int row_offset = input_shape.Dims(2) * col_offset;
			
 
				+  const int batch_offset = input_shape.Dims(1) * row_offset;
			
 
				+
			
 
				+  const T* input_ptr = input_data;
			
 
				+  T* output_ptr = output_data;
			
 
				+  for (int b = 0; b < batches; ++b) {
			
 
				+    for (int y = 0; y < output_height; ++y) {
			
 
				+      int32_t in_y = GetNearestNeighbor(y, input_height, output_height,
			
 
				+                                        op_params.align_corners,
			
 
				+                                        op_params.half_pixel_centers);
			
 
				+      const T* y_input_ptr = input_ptr + in_y * row_offset;
			
 
				+      for (int x = 0; x < output_width; ++x) {
			
 
				+        int32_t in_x = GetNearestNeighbor(x, input_width, output_width,
			
 
				+                                          op_params.align_corners,
			
 
				+                                          op_params.half_pixel_centers);
			
 
				+        const T* x_input_ptr = y_input_ptr + in_x * col_offset;
			
 
				+        memcpy(output_ptr, x_input_ptr, depth * sizeof(T));
			
 
				+        output_ptr += depth;
			
 
				+      }
			
 
				+    }
			
 
				+    input_ptr += batch_offset;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/round.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/round.h
@@ -0,0 +1,51 @@
 
				+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_
			
 
				+
			
 
				+#include <cmath>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+inline float RoundToNearest(float value) {
			
 
				+  auto floor_val = std::floor(value);
			
 
				+  auto diff = value - floor_val;
			
 
				+  if ((diff < 0.5f) ||
			
 
				+      ((diff == 0.5f) && (static_cast<int>(floor_val) % 2 == 0))) {
			
 
				+    return floor_val;
			
 
				+  } else {
			
 
				+    return floor_val = floor_val + 1.0f;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void Round(const RuntimeShape& input_shape, const float* input_data,
			
 
				+                  const RuntimeShape& output_shape, float* output_data) {
			
 
				+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				+    // Note that this implementation matches that of tensorFlow tf.round
			
 
				+    // and corresponds to the bankers rounding method.
			
 
				+    // cfenv (for fesetround) is not yet supported universally on Android, so
			
 
				+    // using a work around.
			
 
				+    output_data[i] = RoundToNearest(input_data[i]);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/softmax.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/softmax.h
@@ -0,0 +1,228 @@
 
				+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
			
 
				+
			
 
				+#include <limits>
			
 
				+#include <vector>
			
 
				+
			
 
				+#include "fixedpoint/fixedpoint.h"
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				+#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+#include "tensorflow/lite/kernels/op_macros.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+inline void Softmax(const SoftmaxParams& params,
			
 
				+                    const RuntimeShape& input_shape, const float* input_data,
			
 
				+                    const RuntimeShape& output_shape, float* output_data) {
			
 
				+  const int trailing_dim = input_shape.DimensionsCount() - 1;
			
 
				+  const int outer_size =
			
 
				+      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
			
 
				+  const int depth =
			
 
				+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
			
 
				+
			
 
				+  for (int i = 0; i < outer_size; ++i) {
			
 
				+    // Find max element value which we'll use to ensure numerical stability
			
 
				+    // taking advantage of the following equality:
			
 
				+    // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
			
 
				+    float max = std::numeric_limits<float>::lowest();
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      max = std::max(max, input_data[i * depth + c]);
			
 
				+    }
			
 
				+
			
 
				+    // Compute sum.
			
 
				+    float sum = 0.f;
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      sum += std::exp((input_data[i * depth + c] - max) *
			
 
				+                      static_cast<float>(params.beta));
			
 
				+    }
			
 
				+
			
 
				+    // Compute result.
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      output_data[i * depth + c] = std::exp((input_data[i * depth + c] - max) *
			
 
				+                                            static_cast<float>(params.beta)) /
			
 
				+                                   sum;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Quantized softmax with int8_t/uint8_t input and int8_t/uint8_t/int16_t
			
 
				+// output.
			
 
				+template <typename InputT, typename OutputT>
			
 
				+inline void Softmax(const SoftmaxParams& params,
			
 
				+                    const RuntimeShape& input_shape, const InputT* input_data,
			
 
				+                    const RuntimeShape& output_shape, OutputT* output_data) {
			
 
				+  const int32_t input_beta_multiplier = params.input_multiplier;
			
 
				+  const int32_t input_beta_left_shift = params.input_left_shift;
			
 
				+  const int diff_min = params.diff_min;
			
 
				+  // The representation chosen for the input to the exp() function is Q5.26.
			
 
				+  // We need to leave extra space since values that we skip might be as large as
			
 
				+  // -32 before multiplying by input_beta_multiplier, and therefore as large as
			
 
				+  // -16 afterwards.  Note that exp(-8) is definitely not insignificant to
			
 
				+  // accumulation, but exp(-16) definitely is.
			
 
				+  static const int kScaledDiffIntegerBits = 5;
			
 
				+  static const int kAccumulationIntegerBits = 12;
			
 
				+  using FixedPointScaledDiff =
			
 
				+      gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
			
 
				+  using FixedPointAccum =
			
 
				+      gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
			
 
				+  using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
			
 
				+
			
 
				+  const int trailing_dim = input_shape.DimensionsCount() - 1;
			
 
				+  const int outer_size =
			
 
				+      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
			
 
				+  const int depth =
			
 
				+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
			
 
				+
			
 
				+  for (int i = 0; i < outer_size; ++i) {
			
 
				+    InputT max_in_row = std::numeric_limits<InputT>::min();
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
			
 
				+    }
			
 
				+
			
 
				+    FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      int32_t input_diff =
			
 
				+          static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
			
 
				+      if (input_diff >= diff_min) {
			
 
				+        const int32_t input_diff_rescaled =
			
 
				+            MultiplyByQuantizedMultiplierGreaterThanOne(
			
 
				+                input_diff, input_beta_multiplier, input_beta_left_shift);
			
 
				+        const FixedPointScaledDiff scaled_diff_f8 =
			
 
				+            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
			
 
				+        sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
			
 
				+                                        exp_on_negative_values(scaled_diff_f8));
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    int num_bits_over_unit;
			
 
				+    FixedPoint0 shifted_scale = FixedPoint0::FromRaw(GetReciprocal(
			
 
				+        sum_of_exps.raw(), kAccumulationIntegerBits, &num_bits_over_unit));
			
 
				+
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      int32_t input_diff =
			
 
				+          static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
			
 
				+      if (input_diff >= diff_min) {
			
 
				+        const int32_t input_diff_rescaled =
			
 
				+            MultiplyByQuantizedMultiplierGreaterThanOne(
			
 
				+                input_diff, input_beta_multiplier, input_beta_left_shift);
			
 
				+        const FixedPointScaledDiff scaled_diff_f8 =
			
 
				+            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
			
 
				+
			
 
				+        FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
			
 
				+        int32_t unsat_output = gemmlowp::RoundingDivideByPOT(
			
 
				+            (shifted_scale * exp_in_0).raw(),
			
 
				+            num_bits_over_unit + 31 - (sizeof(OutputT) * 8));
			
 
				+
			
 
				+        const int32_t shifted_output =
			
 
				+            unsat_output +
			
 
				+            static_cast<int32_t>(std::numeric_limits<OutputT>::min());
			
 
				+
			
 
				+        output_data[i * depth + c] = static_cast<OutputT>(std::max(
			
 
				+            std::min(shifted_output,
			
 
				+                     static_cast<int32_t>(std::numeric_limits<OutputT>::max())),
			
 
				+            static_cast<int32_t>(std::numeric_limits<OutputT>::min())));
			
 
				+      } else {
			
 
				+        output_data[i * depth + c] = std::numeric_limits<OutputT>::min();
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Quantized softmax with int16_t input and int16_t output.
			
 
				+inline void SoftmaxInt16(const SoftmaxParams& params,
			
 
				+                         const RuntimeShape& input_shape,
			
 
				+                         const int16_t* input_data,
			
 
				+                         const RuntimeShape& output_shape,
			
 
				+                         int16_t* output_data) {
			
 
				+  const int trailing_dim = input_shape.DimensionsCount() - 1;
			
 
				+  const int outer_size =
			
 
				+      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
			
 
				+  const int depth =
			
 
				+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
			
 
				+
			
 
				+  for (int i = 0; i < outer_size; ++i) {
			
 
				+    // Find the largest element
			
 
				+    int16_t max_in_row = std::numeric_limits<int16_t>::min();
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
			
 
				+    }
			
 
				+
			
 
				+    // Compute exp(input - max_input)
			
 
				+    std::vector<int16_t> exp_result_Q015(depth);
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      int32_t input_diff = input_data[i * depth + c] - max_in_row;
			
 
				+      // scale the input_diff such that [-65535, 0] correspond to [-10.0, 0.0]
			
 
				+      int32_t scaled_diff = MultiplyByQuantizedMultiplier(
			
 
				+          input_diff, params.input_multiplier, params.input_left_shift);
			
 
				+      // recenter to [-32768, 32767]
			
 
				+      int32_t sym_scaled_diff = scaled_diff + 32767;
			
 
				+      int16_t sat_sym_scaled_diff =
			
 
				+          std::min(std::max(sym_scaled_diff, static_cast<int32_t>(-32768)),
			
 
				+                   static_cast<int32_t>(32767));
			
 
				+      // apply the exp() LUT activation function
			
 
				+      exp_result_Q015[c] =
			
 
				+          generic_int16_table_lookup(sat_sym_scaled_diff, params.exp_lut);
			
 
				+    }
			
 
				+
			
 
				+    // sum_of_exps is a Q16.15 fixed point format.
			
 
				+    int32_t sum_of_exps = 0;
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      // Q16.15 + Q0.15
			
 
				+      sum_of_exps += exp_result_Q015[c];
			
 
				+    }
			
 
				+
			
 
				+    // Compute the reciprocal 1/sum_of_exps
			
 
				+    uint8_t headroom_plus_one =
			
 
				+        CountLeadingZeros(static_cast<uint32_t>(sum_of_exps));
			
 
				+    int32_t shifted_sum =
			
 
				+        ((static_cast<int64_t>(sum_of_exps) << (headroom_plus_one - 1)) +
			
 
				+         (1 << 13)) >>
			
 
				+        14;
			
 
				+    // since the LUT computes 1/(1 + x) we need to first compute x = (sum - 1).
			
 
				+    // also, the LUT expects a symmetrical input, so we must also recenter x
			
 
				+    // from [0, 65535] to [-32768, 32767].
			
 
				+    int32_t sym_shifted_sum = shifted_sum + (-((1 << 15) + (1 << 16)));
			
 
				+    int16_t sat_sym_shifted_sum = static_cast<int16_t>(
			
 
				+        std::min(std::max(sym_shifted_sum, static_cast<int32_t>(-32768)),
			
 
				+                 static_cast<int32_t>(32767)));
			
 
				+    // apply 1/(1 + x) LUT activation function
			
 
				+    int16_t reciprocal_scale_Q015 = generic_int16_table_lookup(
			
 
				+        sat_sym_shifted_sum, params.one_over_one_plus_x_lut);
			
 
				+
			
 
				+    // Rescale the exp_result with reciprocal
			
 
				+    // range of output is [0, 32767] correspond to [0.0, 1.0]
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      uint8_t right_shift = 31 - headroom_plus_one;
			
 
				+      int64_t round = 1 << (right_shift - 1);
			
 
				+      int32_t result = (static_cast<int64_t>(exp_result_Q015[c]) *
			
 
				+                            static_cast<int64_t>(reciprocal_scale_Q015) +
			
 
				+                        round) >>
			
 
				+                       right_shift;
			
 
				+      output_data[i * depth + c] = static_cast<int16_t>(
			
 
				+          std::min(std::max(result, static_cast<int32_t>(0)),
			
 
				+                   static_cast<int32_t>(32767)));
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/strided_slice.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/strided_slice.h
@@ -0,0 +1,94 @@
 
				+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+template <typename T>
			
 
				+inline void StridedSlice(const tflite::StridedSliceParams& op_params,
			
 
				+                         const RuntimeShape& unextended_input_shape,
			
 
				+                         const T* input_data,
			
 
				+                         const RuntimeShape& unextended_output_shape,
			
 
				+                         T* output_data) {
			
 
				+  using strided_slice::LoopCondition;
			
 
				+  using strided_slice::StartForAxis;
			
 
				+  using strided_slice::StopForAxis;
			
 
				+  // Note that the output_shape is not used herein.
			
 
				+  tflite::StridedSliceParams params_copy = op_params;
			
 
				+
			
 
				+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 5);
			
 
				+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 5);
			
 
				+  const RuntimeShape input_shape =
			
 
				+      RuntimeShape::ExtendedShape(5, unextended_input_shape);
			
 
				+  const RuntimeShape output_shape =
			
 
				+      RuntimeShape::ExtendedShape(5, unextended_output_shape);
			
 
				+
			
 
				+  // Reverse and pad to 5 dimensions because that is what the runtime code
			
 
				+  // requires (ie. all shapes must be 5D and are given backwards).
			
 
				+  strided_slice::StridedSlicePadIndices(&params_copy, 5);
			
 
				+
			
 
				+  const int start_0 = StartForAxis(params_copy, input_shape, 0);
			
 
				+  const int stop_0 = StopForAxis(params_copy, input_shape, 0, start_0);
			
 
				+  const int start_1 = StartForAxis(params_copy, input_shape, 1);
			
 
				+  const int stop_1 = StopForAxis(params_copy, input_shape, 1, start_1);
			
 
				+  const int start_2 = StartForAxis(params_copy, input_shape, 2);
			
 
				+  const int stop_2 = StopForAxis(params_copy, input_shape, 2, start_2);
			
 
				+  const int start_3 = StartForAxis(params_copy, input_shape, 3);
			
 
				+  const int stop_3 = StopForAxis(params_copy, input_shape, 3, start_3);
			
 
				+  const int start_4 = StartForAxis(params_copy, input_shape, 4);
			
 
				+  const int stop_4 = StopForAxis(params_copy, input_shape, 4, start_4);
			
 
				+
			
 
				+  T* out_ptr = output_data;
			
 
				+  for (int offset_0 = start_0 * input_shape.Dims(1),
			
 
				+           end_0 = stop_0 * input_shape.Dims(1),
			
 
				+           step_0 = params_copy.strides[0] * input_shape.Dims(1);
			
 
				+       !LoopCondition(offset_0, end_0, params_copy.strides[0]);
			
 
				+       offset_0 += step_0) {
			
 
				+    for (int offset_1 = (offset_0 + start_1) * input_shape.Dims(2),
			
 
				+             end_1 = (offset_0 + stop_1) * input_shape.Dims(2),
			
 
				+             step_1 = params_copy.strides[1] * input_shape.Dims(2);
			
 
				+         !LoopCondition(offset_1, end_1, params_copy.strides[1]);
			
 
				+         offset_1 += step_1) {
			
 
				+      for (int offset_2 = (offset_1 + start_2) * input_shape.Dims(3),
			
 
				+               end_2 = (offset_1 + stop_2) * input_shape.Dims(3),
			
 
				+               step_2 = params_copy.strides[2] * input_shape.Dims(3);
			
 
				+           !LoopCondition(offset_2, end_2, params_copy.strides[2]);
			
 
				+           offset_2 += step_2) {
			
 
				+        for (int offset_3 = (offset_2 + start_3) * input_shape.Dims(4),
			
 
				+                 end_3 = (offset_2 + stop_3) * input_shape.Dims(4),
			
 
				+                 step_3 = params_copy.strides[3] * input_shape.Dims(4);
			
 
				+             !LoopCondition(offset_3, end_3, params_copy.strides[3]);
			
 
				+             offset_3 += step_3) {
			
 
				+          for (int offset_4 = offset_3 + start_4, end_4 = offset_3 + stop_4;
			
 
				+               !LoopCondition(offset_4, end_4, params_copy.strides[4]);
			
 
				+               offset_4 += params_copy.strides[4]) {
			
 
				+            *out_ptr++ = input_data[offset_4];
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
			
--- a/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/sub.h
+++ b/tensorflow/lite/micro/tensorflow/lite/kernels/internal/reference/sub.h
@@ -0,0 +1,516 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+#include <algorithm>
			
 
				+#include <limits>
			
 
				+
			
 
				+#include "ruy/profiler/instrumentation.h"  // from @ruy
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+inline void SubNonBroadcast(const ArithmeticParams& params,
			
 
				+                            const RuntimeShape& input1_shape,
			
 
				+                            const float* input1_data,
			
 
				+                            const RuntimeShape& input2_shape,
			
 
				+                            const float* input2_data,
			
 
				+                            const RuntimeShape& output_shape,
			
 
				+                            float* output_data) {
			
 
				+  const int flat_size =
			
 
				+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				+    output_data[i] = ActivationFunctionWithMinMax(
			
 
				+        input1_data[i] - input2_data[i], params.float_activation_min,
			
 
				+        params.float_activation_max);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void SubNonBroadcast(const ArithmeticParams& params,
			
 
				+                            const RuntimeShape& input1_shape,
			
 
				+                            const int32_t* input1_data,
			
 
				+                            const RuntimeShape& input2_shape,
			
 
				+                            const int32_t* input2_data,
			
 
				+                            const RuntimeShape& output_shape,
			
 
				+                            int32_t* output_data) {
			
 
				+  const int flat_size =
			
 
				+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				+    output_data[i] = ActivationFunctionWithMinMax(
			
 
				+        input1_data[i] - input2_data[i], params.quantized_activation_min,
			
 
				+        params.quantized_activation_max);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// TODO(b/151345304): We can implement BroadcastSub on buffers of arbitrary
			
 
				+// dimensionality if the runtime code does a single loop over one dimension
			
 
				+// that handles broadcasting as the base case. The code generator would then
			
 
				+// generate max(D1, D2) nested for loops.
			
 
				+// TODO(b/151345101): BroadcastSub is intentionally duplicated from
			
 
				+// reference_ops.h. Once an optimized version is implemented and NdArrayDesc<T>
			
 
				+// is no longer referenced in this file, move NdArrayDesc<T> from types.h to
			
 
				+// reference_ops.h.
			
 
				+template <int N = 5>
			
 
				+inline void BroadcastSubSlow(const ArithmeticParams& params,
			
 
				+                             const RuntimeShape& input1_shape,
			
 
				+                             const float* input1_data,
			
 
				+                             const RuntimeShape& input2_shape,
			
 
				+                             const float* input2_data,
			
 
				+                             const RuntimeShape& output_shape,
			
 
				+                             float* output_data) {
			
 
				+  ruy::profiler::ScopeLabel label("BroadcastSubSlow/float");
			
 
				+  TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
			
 
				+  TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
			
 
				+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
			
 
				+  NdArrayDesc<N> desc1;
			
 
				+  NdArrayDesc<N> desc2;
			
 
				+  NdArrayDesc<N> output_desc;
			
 
				+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				+                                      &desc2);
			
 
				+  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
			
 
				+
			
 
				+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				+  // col, channel), with extents (batches, height, width, depth), with the
			
 
				+  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				+  // typically 1 element).
			
 
				+  //
			
 
				+  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				+  // first dimension has smallest stride.
			
 
				+  //
			
 
				+  // We name our variables by their Tensorflow convention, but generate C code
			
 
				+  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				+  // best cache behavior.
			
 
				+  auto sub_func = [&](int indexes[N]) {
			
 
				+    output_data[SubscriptToIndex(output_desc, indexes)] =
			
 
				+        ActivationFunctionWithMinMax(
			
 
				+            input1_data[SubscriptToIndex(desc1, indexes)] -
			
 
				+                input2_data[SubscriptToIndex(desc2, indexes)],
			
 
				+            params.float_activation_min, params.float_activation_max);
			
 
				+  };
			
 
				+  NDOpsHelper<N>(output_desc, sub_func);
			
 
				+}
			
 
				+
			
 
				+template <int N = 5>
			
 
				+inline void BroadcastSubSlow(const ArithmeticParams& params,
			
 
				+                             const RuntimeShape& input1_shape,
			
 
				+                             const uint8_t* input1_data,
			
 
				+                             const RuntimeShape& input2_shape,
			
 
				+                             const uint8_t* input2_data,
			
 
				+                             const RuntimeShape& output_shape,
			
 
				+                             uint8_t* output_data) {
			
 
				+  ruy::profiler::ScopeLabel label("BroadcastSubSlow/uint8_t");
			
 
				+  TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
			
 
				+  TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
			
 
				+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
			
 
				+  NdArrayDesc<N> desc1;
			
 
				+  NdArrayDesc<N> desc2;
			
 
				+  NdArrayDesc<N> output_desc;
			
 
				+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				+                                      &desc2);
			
 
				+  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
			
 
				+
			
 
				+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				+  // col, channel), with extents (batches, height, width, depth), with the
			
 
				+  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				+  // typically 1 element).
			
 
				+  //
			
 
				+  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				+  // first dimension has smallest stride.
			
 
				+  //
			
 
				+  // We name our variables by their Tensorflow convention, but generate C code
			
 
				+  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				+  // best cache behavior.
			
 
				+  auto sub_func = [&](int indexes[N]) {
			
 
				+    const int32_t input1_val =
			
 
				+        params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
			
 
				+    const int32_t input2_val =
			
 
				+        params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
			
 
				+    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
			
 
				+    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
			
 
				+    const int32_t scaled_input1_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input1_val, params.input1_multiplier, params.input1_shift);
			
 
				+    const int32_t scaled_input2_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input2_val, params.input2_multiplier, params.input2_shift);
			
 
				+    const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
			
 
				+    const int32_t raw_output =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            raw_sub, params.output_multiplier, params.output_shift) +
			
 
				+        params.output_offset;
			
 
				+    const int32_t clamped_output =
			
 
				+        std::min(params.quantized_activation_max,
			
 
				+                 std::max(params.quantized_activation_min, raw_output));
			
 
				+    output_data[SubscriptToIndex(output_desc, indexes)] =
			
 
				+        static_cast<uint8_t>(clamped_output);
			
 
				+  };
			
 
				+  NDOpsHelper<N>(output_desc, sub_func);
			
 
				+}
			
 
				+
			
 
				+template <int N = 5>
			
 
				+inline void BroadcastSubSlow(const ArithmeticParams& params,
			
 
				+                             const RuntimeShape& input1_shape,
			
 
				+                             const int32_t* input1_data,
			
 
				+                             const RuntimeShape& input2_shape,
			
 
				+                             const int32_t* input2_data,
			
 
				+                             const RuntimeShape& output_shape,
			
 
				+                             int32_t* output_data) {
			
 
				+  ruy::profiler::ScopeLabel label("BroadcastSubSlow/int32_t");
			
 
				+  TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
			
 
				+  TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
			
 
				+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
			
 
				+  NdArrayDesc<N> desc1;
			
 
				+  NdArrayDesc<N> desc2;
			
 
				+  NdArrayDesc<N> output_desc;
			
 
				+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				+                                      &desc2);
			
 
				+  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
			
 
				+
			
 
				+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				+  // col, channel), with extents (batches, height, width, depth), with the
			
 
				+  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				+  // typically 1 element).
			
 
				+  //
			
 
				+  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				+  // first dimension has smallest stride.
			
 
				+  //
			
 
				+  // We name our variables by their Tensorflow convention, but generate C code
			
 
				+  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				+  // best cache behavior.
			
 
				+  auto sub_func = [&](int indexes[N]) {
			
 
				+    output_data[SubscriptToIndex(output_desc, indexes)] =
			
 
				+        ActivationFunctionWithMinMax(
			
 
				+            input1_data[SubscriptToIndex(desc1, indexes)] -
			
 
				+                input2_data[SubscriptToIndex(desc2, indexes)],
			
 
				+            params.quantized_activation_min, params.quantized_activation_max);
			
 
				+  };
			
 
				+  NDOpsHelper<N>(output_desc, sub_func);
			
 
				+}
			
 
				+
			
 
				+template <int N = 5>
			
 
				+inline void BroadcastSubSlow(const ArithmeticParams& params,
			
 
				+                             const RuntimeShape& input1_shape,
			
 
				+                             const int8_t* input1_data,
			
 
				+                             const RuntimeShape& input2_shape,
			
 
				+                             const int8_t* input2_data,
			
 
				+                             const RuntimeShape& output_shape,
			
 
				+                             int8_t* output_data) {
			
 
				+  ruy::profiler::ScopeLabel label("BroadcastSubSlow/int8_t");
			
 
				+  NdArrayDesc<N> desc1;
			
 
				+  NdArrayDesc<N> desc2;
			
 
				+  NdArrayDesc<N> output_desc;
			
 
				+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				+                                      &desc2);
			
 
				+  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
			
 
				+
			
 
				+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				+  // col, channel), with extents (batches, height, width, depth), with the
			
 
				+  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				+  // typically 1 element).
			
 
				+  //
			
 
				+  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				+  // first dimension has smallest stride.
			
 
				+  //
			
 
				+  // We name our variables by their Tensorflow convention, but generate C code
			
 
				+  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				+  // best cache behavior.
			
 
				+  auto sub_func = [&](int indexes[N]) {
			
 
				+    const int32_t input1_val =
			
 
				+        params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
			
 
				+    const int32_t input2_val =
			
 
				+        params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
			
 
				+    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
			
 
				+    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
			
 
				+    const int32_t scaled_input1_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input1_val, params.input1_multiplier, params.input1_shift);
			
 
				+    const int32_t scaled_input2_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input2_val, params.input2_multiplier, params.input2_shift);
			
 
				+    const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
			
 
				+    const int32_t raw_output =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            raw_sub, params.output_multiplier, params.output_shift) +
			
 
				+        params.output_offset;
			
 
				+    const int32_t clamped_output =
			
 
				+        std::min(params.quantized_activation_max,
			
 
				+                 std::max(params.quantized_activation_min, raw_output));
			
 
				+    output_data[SubscriptToIndex(output_desc, indexes)] =
			
 
				+        static_cast<int8_t>(clamped_output);
			
 
				+  };
			
 
				+  NDOpsHelper<N>(output_desc, sub_func);
			
 
				+}
			
 
				+
			
 
				+template <int N = 5>
			
 
				+void BroadcastSubSlow(const ArithmeticParams& params,
			
 
				+                      const RuntimeShape& input1_shape,
			
 
				+                      const int64_t* input1_data,
			
 
				+                      const RuntimeShape& input2_shape,
			
 
				+                      const int64_t* input2_data,
			
 
				+                      const RuntimeShape& output_shape, int64_t* output_data) {
			
 
				+  ruy::profiler::ScopeLabel label("BroadcastSubSlow/int64_t");
			
 
				+  TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
			
 
				+  TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
			
 
				+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
			
 
				+  NdArrayDesc<N> desc1;
			
 
				+  NdArrayDesc<N> desc2;
			
 
				+  NdArrayDesc<N> output_desc;
			
 
				+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				+                                      &desc2);
			
 
				+  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
			
 
				+
			
 
				+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				+  // col, channel), with extents (batches, height, width, depth), with the
			
 
				+  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				+  // typically 1 element).
			
 
				+  //
			
 
				+  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				+  // first dimension has smallest stride.
			
 
				+  //
			
 
				+  // We name our variables by their Tensorflow convention, but generate C code
			
 
				+  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				+  // best cache behavior.
			
 
				+  auto sub_func = [&](int indexes[N]) {
			
 
				+    output_data[SubscriptToIndex(output_desc, indexes)] =
			
 
				+        ActivationFunctionWithMinMax(
			
 
				+            input1_data[SubscriptToIndex(desc1, indexes)] -
			
 
				+                input2_data[SubscriptToIndex(desc2, indexes)],
			
 
				+            params.int64_activation_min, params.int64_activation_max);
			
 
				+  };
			
 
				+  NDOpsHelper<N>(output_desc, sub_func);
			
 
				+}
			
 
				+
			
 
				+template <typename T, int N = 5>
			
 
				+void BroadcastSubSlow(const ArithmeticParams& params,
			
 
				+                      const RuntimeShape& input1_shape, const T* input1_data,
			
 
				+                      const RuntimeShape& input2_shape, const T* input2_data,
			
 
				+                      const RuntimeShape& output_shape, T* output_data) {
			
 
				+  ruy::profiler::ScopeLabel label("BroadcastSubSlow/templated");
			
 
				+  TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
			
 
				+  TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
			
 
				+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
			
 
				+  NdArrayDesc<N> desc1;
			
 
				+  NdArrayDesc<N> desc2;
			
 
				+  NdArrayDesc<N> output_desc;
			
 
				+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				+                                      &desc2);
			
 
				+  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
			
 
				+
			
 
				+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				+  // col, channel), with extents (batches, height, width, depth), with the
			
 
				+  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				+  // typically 1 element).
			
 
				+  //
			
 
				+  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				+  // first dimension has smallest stride.
			
 
				+  //
			
 
				+  // We name our variables by their Tensorflow convention, but generate C code
			
 
				+  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				+  // best cache behavior.
			
 
				+  auto sub_func = [&](int indexes[N]) {
			
 
				+    output_data[SubscriptToIndex(output_desc, indexes)] =
			
 
				+        ActivationFunctionWithMinMax(
			
 
				+            input1_data[SubscriptToIndex(desc1, indexes)] -
			
 
				+                input2_data[SubscriptToIndex(desc2, indexes)],
			
 
				+            params.quantized_activation_min, params.quantized_activation_max);
			
 
				+  };
			
 
				+  NDOpsHelper<N>(output_desc, sub_func);
			
 
				+}
			
 
				+
			
 
				+// Element-wise Sub that can often be used for inner loop of broadcast sub as
			
 
				+// well as the non-broadcast sub.
			
 
				+inline void SubElementwise(int size, const ArithmeticParams& params,
			
 
				+                           const uint8_t* input1_data,
			
 
				+                           const uint8_t* input2_data, uint8_t* output_data) {
			
 
				+  TFLITE_DCHECK_GT(params.input1_offset, -256);
			
 
				+  TFLITE_DCHECK_GT(params.input2_offset, -256);
			
 
				+  TFLITE_DCHECK_LT(params.input1_offset, 256);
			
 
				+  TFLITE_DCHECK_LT(params.input2_offset, 256);
			
 
				+
			
 
				+  for (int i = 0; i < size; ++i) {
			
 
				+    const int32_t input1_val = params.input1_offset + input1_data[i];
			
 
				+    const int32_t input2_val = params.input2_offset + input2_data[i];
			
 
				+    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
			
 
				+    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
			
 
				+    const int32_t scaled_input1_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input1_val, params.input1_multiplier, params.input1_shift);
			
 
				+    const int32_t scaled_input2_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input2_val, params.input2_multiplier, params.input2_shift);
			
 
				+    const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
			
 
				+    const int32_t raw_output =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            raw_sub, params.output_multiplier, params.output_shift) +
			
 
				+        params.output_offset;
			
 
				+    const int32_t clamped_output =
			
 
				+        std::min(params.quantized_activation_max,
			
 
				+                 std::max(params.quantized_activation_min, raw_output));
			
 
				+    output_data[i] = static_cast<uint8_t>(clamped_output);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Element-wise add that can often be used for inner loop of broadcast add as
			
 
				+// well as the non-broadcast add.
			
 
				+inline void SubElementwise(int size, const ArithmeticParams& params,
			
 
				+                           const int8_t* input1_data, const int8_t* input2_data,
			
 
				+                           int8_t* output_data) {
			
 
				+  const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
			
 
				+  TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
			
 
				+  TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
			
 
				+  TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
			
 
				+  TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
			
 
				+
			
 
				+  for (int i = 0; i < size; ++i) {
			
 
				+    const int32_t input1_val = params.input1_offset + input1_data[i];
			
 
				+    const int32_t input2_val = params.input2_offset + input2_data[i];
			
 
				+    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
			
 
				+    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
			
 
				+    const int32_t scaled_input1_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input1_val, params.input1_multiplier, params.input1_shift);
			
 
				+    const int32_t scaled_input2_val =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_input2_val, params.input2_multiplier, params.input2_shift);
			
 
				+    const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
			
 
				+    const int32_t raw_output =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            raw_sub, params.output_multiplier, params.output_shift) +
			
 
				+        params.output_offset;
			
 
				+    const int32_t clamped_output =
			
 
				+        std::min(params.quantized_activation_max,
			
 
				+                 std::max(params.quantized_activation_min, raw_output));
			
 
				+    output_data[i] = static_cast<int8_t>(clamped_output);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void Sub(const ArithmeticParams& params,
			
 
				+                const RuntimeShape& input1_shape, const uint8_t* input1_data,
			
 
				+                const RuntimeShape& input2_shape, const uint8_t* input2_data,
			
 
				+                const RuntimeShape& output_shape, uint8_t* output_data) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+  const int flat_size =
			
 
				+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				+
			
 
				+  TFLITE_DCHECK_GT(params.input1_offset, -256);
			
 
				+  TFLITE_DCHECK_GT(params.input2_offset, -256);
			
 
				+  TFLITE_DCHECK_LT(params.input1_offset, 256);
			
 
				+  TFLITE_DCHECK_LT(params.input2_offset, 256);
			
 
				+  SubElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				+}
			
 
				+
			
 
				+inline void Sub(const ArithmeticParams& params,
			
 
				+                const RuntimeShape& input1_shape, const int8_t* input1_data,
			
 
				+                const RuntimeShape& input2_shape, const int8_t* input2_data,
			
 
				+                const RuntimeShape& output_shape, int8_t* output_data) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+
			
 
				+  const int flat_size =
			
 
				+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				+
			
 
				+  const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
			
 
				+  TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
			
 
				+  TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
			
 
				+  TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
			
 
				+  TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
			
 
				+  SubElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape,
			
 
				+         const T* input1_data, const RuntimeShape& input2_shape,
			
 
				+         const T* input2_data, const RuntimeShape& output_shape,
			
 
				+         T* output_data) {
			
 
				+  NdArrayDesc<4> desc1;
			
 
				+  NdArrayDesc<4> desc2;
			
 
				+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				+                                      &desc2);
			
 
				+  const RuntimeShape extended_output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, output_shape);
			
 
				+
			
 
				+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				+  // col, channel), with extents (batches, height, width, depth), with the
			
 
				+  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				+  // typically 1 element).
			
 
				+  //
			
 
				+  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				+  // first dimension has smallest stride.
			
 
				+  //
			
 
				+  // We name our variables by their Tensorflow convention, but generate C code
			
 
				+  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				+  // best cache behavior.
			
 
				+  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
			
 
				+    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
			
 
				+      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
			
 
				+        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
			
 
				+          output_data[Offset(extended_output_shape, b, y, x, c)] =
			
 
				+              input1_data[SubscriptToIndex(desc1, b, y, x, c)] -
			
 
				+              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void SetActivationMinMax(const ArithmeticParams& params,
			
 
				+                                int32_t* activation_min,
			
 
				+                                int32_t* activation_max) {
			
 
				+  *activation_min = params.quantized_activation_min;
			
 
				+  *activation_max = params.quantized_activation_max;
			
 
				+}
			
 
				+
			
 
				+inline void SetActivationMinMax(const ArithmeticParams& params,
			
 
				+                                float* activation_min, float* activation_max) {
			
 
				+  *activation_min = params.float_activation_min;
			
 
				+  *activation_max = params.float_activation_max;
			
 
				+}
			
 
				+
			
 
				+inline void SetActivationMinMax(const ArithmeticParams& params,
			
 
				+                                int64_t* activation_min,
			
 
				+                                int64_t* activation_max) {
			
 
				+  *activation_min = params.int64_activation_min;
			
 
				+  *activation_max = params.int64_activation_max;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void SubWithActivation(
			
 
				+    const ArithmeticParams& params, const RuntimeShape& input1_shape,
			
 
				+    const T* input1_data, const RuntimeShape& input2_shape,
			
 
				+    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
			
 
				+  ruy::profiler::ScopeLabel label("SubWithActivation");
			
 
				+  const int flat_size =
			
 
				+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				+  T activation_min, activation_max;
			
 
				+  SetActivationMinMax(params, &activation_min, &activation_max);
			
 
				+
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				+    output_data[i] = ActivationFunctionWithMinMax(
			
 
				+        input1_data[i] - input2_data[i], activation_min, activation_max);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_