RT-Thread-packages
/
CMSIS-NN
зеркало из https://github-proxy.rt-thread.io/RT-Thread-packages/CMSIS-NN.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
							# SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the License); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from test_settings import TestSettings

import tensorflow as tf
import numpy as np
import math
import tf_keras as keras

class ConvSettings(TestSettings):

    def __init__(self,
                 dataset,
                 testtype,
                 regenerate_weights,
                 regenerate_input,
                 regenerate_biases,
                 schema_file,
                 in_ch=1,
                 out_ch=1,
                 x_in=7,
                 y_in=7,
                 w_x=3,
                 w_y=3,
                 stride_x=2,
                 stride_y=2,
                 groups=1,
                 pad=True,
                 randmin=TestSettings.INT8_MIN,
                 randmax=TestSettings.INT8_MAX,
                 batches=1,
                 generate_bias=True,
                 relu6=False,
                 out_activation_min=None,
                 out_activation_max=None,
                 int16xint8=False,
                 bias_min=TestSettings.INT32_MIN,
                 bias_max=TestSettings.INT32_MAX,
                 dilation_x=1,
                 dilation_y=1,
                 interpreter="tensorflow",
                 int4_weights=False):
        super().__init__(dataset,
                         testtype,
                         regenerate_weights,
                         regenerate_input,
                         regenerate_biases,
                         schema_file,
                         in_ch,
                         out_ch,
                         x_in,
                         y_in,
                         w_x,
                         w_y,
                         stride_x,
                         stride_y,
                         pad,
                         randmin,
                         randmax,
                         batches,
                         generate_bias=generate_bias,
                         relu6=relu6,
                         out_activation_min=out_activation_min,
                         out_activation_max=out_activation_max,
                         int16xint8=int16xint8,
                         bias_min=bias_min,
                         bias_max=bias_max,
                         dilation_x=dilation_x,
                         dilation_y=dilation_y,
                         interpreter=interpreter,
                         int4_weights=int4_weights)

        self.scaling_factors = []
        self.groups = groups

        if self.test_type == 'depthwise_conv':
            self.channel_multiplier = self.output_ch // self.input_ch
            if self.output_ch % self.input_ch != 0:
                raise RuntimeError("out channel ({}) is not multiple of in channel ({})".format(out_ch, in_ch))
            if groups != 1:
                raise RuntimeError("ERROR: Groups cannot be used for depthwise convolution")
        else:
            self.channel_multiplier = 0

        self.filter_ch = in_ch // groups
        if in_ch % groups != 0:
            raise RuntimeError("ERROR: Input channels {} must be an even multiple of groups {}".format(in_ch, groups))
        if out_ch % groups != 0:
            raise RuntimeError("ERROR: Output channels {} must be an even multiple of groups {}".format(out_ch, groups))

        if self.int4_weights:
            if self.test_type == 'conv':
                self.json_template = "TestCases/Common/conv2d_s4_weights_template.json"
            elif self.test_type == 'depthwise_conv':
                self.json_template = "TestCases/Common/dw_s4_weights_template.json"

    def write_c_config_header(self) -> None:
        super().write_c_config_header()

        filename = self.config_data
        filepath = self.headers_dir + filename
        prefix = self.testdataset.upper()

        with open(filepath, "a") as f:
            self.write_common_config(f, prefix)
            if self.test_type == 'depthwise_conv':
                f.write("#define {}_CH_MULT {}\n".format(prefix, self.channel_multiplier))
            f.write("#define {}_INPUT_OFFSET {}\n".format(prefix, -self.input_zero_point))
            f.write("#define {}_OUTPUT_OFFSET {}\n".format(prefix, self.output_zero_point))
            f.write("#define {}_DILATION_X {}\n".format(prefix, self.dilation_x))
            f.write("#define {}_DILATION_Y {}\n".format(prefix, self.dilation_y))
            if self.groups != 1:
                f.write("#define {}_FILTER_CH {}\n".format(prefix, self.filter_ch))
            if self.test_type == 'transpose_conv':
                f.write("#define {}_PAD_X_WITH_OFFSET {}\n".format(prefix, self.pad_x_with_offset))
                f.write("#define {}_PAD_Y_WITH_OFFSET {}\n".format(prefix, self.pad_y_with_offset))

    def generate_quantize_per_channel_multiplier(self):
        num_channels = self.output_ch
        per_channel_multiplier = []
        per_channel_shift = []

        if len(self.scaling_factors) != num_channels:
            raise RuntimeError("Missing scaling factors")

        for i in range(num_channels):
            effective_output_scale = self.input_scale * self.scaling_factors[i] / self.output_scale
            (quantized_multiplier, shift) = self.quantize_scale(effective_output_scale)

            per_channel_multiplier.append(quantized_multiplier)
            per_channel_shift.append(shift)

        return per_channel_multiplier, per_channel_shift

    def generate_int4_scale(self, scale, shift, input_scale):
        self.output_scale = scale
        self.output_zp = shift
        self.input_scale = input_scale
        self.scaling_factors = np.random.uniform(0.001, 0.01, [self.output_ch]).tolist()
        per_channel_multiplier, per_channel_shift = self.generate_quantize_per_channel_multiplier()

        while any((x > 31 or x < -31) for x in per_channel_shift):
            self.output_scale = self.output_scale / 10
            per_channel_multiplier, per_channel_shift = self.generate_quantize_per_channel_multiplier()

        return self.output_scale, self.output_zp

    # TODO
    def quantize_float_data(self, data=None, quantization_bit_range=8, quantization_type="affine", tf_tensor=False):
        if data is not None:
            if tf_tensor:
                data = data.numpy()
            data_max = np.amax(data)
            data_min = np.amin(data)

            if quantization_type.lower() == "affine":
                data_min = min(data_min, 0.0)
                data_max = max(data_max, 0.0)

                scale = (data_max - data_min) / (pow(2, quantization_bit_range) - 1)
                zero_point = -(round(data_max * scale)) - pow(2, quantization_bit_range - 1)
                zero_point = max(zero_point, pow(quantization_bit_range - 1) - 1)
                zero_point = min(zero_point, -pow(quantization_bit_range - 1))

            elif quantization_type.lower() == "symmetric":
                absolute_max = max(abs(data_min), abs(data_max))
                scale = absolute_max / (pow(2, quantization_bit_range - 1) - 1)
                zero_point = 0

            else:
                raise RuntimeError("Quantization scheme not supported")

            scale = 0.1 if scale == 0 else scale
            quantized_data = [(x // scale) + zero_point for x in data]
            return tf.convert_to_tensor(quantized_data), scale, zero_point

    def generate_data(self, input_data=None, weights=None, biases=None) -> None:
        if self.is_int16xint8:
            inttype = tf.int16
            datatype = "int16_t"
            bias_datatype = "int64_t"
        else:
            inttype = tf.int8
            datatype = "int8_t"
            bias_datatype = "int32_t"

        input_data = self.get_randomized_input_data(input_data)
        biases = self.get_randomized_bias_data(biases)

        if self.test_type == 'conv' or self.test_type == 'transpose_conv':
            out_channel = self.output_ch
        elif self.test_type == 'depthwise_conv':
            out_channel = self.channel_multiplier

        if self.int4_weights:
            w_shape = [self.filter_y * self.filter_x * self.input_ch * out_channel]

            if weights is not None:
                weights = tf.reshape(weights, w_shape)
            else:
                weights = self.get_randomized_data(w_shape,
                                                   self.kernel_table_file,
                                                   minrange=TestSettings.INT4_MIN,
                                                   maxrange=TestSettings.INT4_MAX,
                                                   decimals=1,
                                                   regenerate=self.regenerate_new_weights)

            input_scale = 0.046774
            input_zp = -128

            if w_shape[0] % 2:
                weights = np.append(weights, [0])

            if self.test_type == 'depthwise_conv':
                bias_scale = [64751.269531] * self.output_ch
                bias_zp = [0] * self.output_ch
                if self.generate_bias:
                    output_scale, output_zp = self.generate_int4_scale(4684910.0, -2, input_scale)
                else:
                    output_scale = 0.525255
                    output_zp = 2
            else:
                quant_bias, bias_scale, bias_zp = self.quantize_float_data(
                    biases, quantization_bit_range=8, quantization_type="symmetric", tf_tensor=not self.generate_bias)
                bias_scale = [bias_scale] * self.output_ch
                bias_zp = [bias_zp] * self.output_ch

                output_scale = np.random.uniform(0.02, 0.06)
                output_zp = 0

            scaling_factors = np.random.uniform(0.001, 0.01, [self.output_ch]).tolist()
            w_zp = [0] * self.output_ch

            if self.has_padding:
                # TODO dilation with padding
                output_x = math.ceil(float(self.x_input) / float(self.stride_x))
                output_y = math.ceil(float(self.y_input) / float(self.stride_y))
            else:
                dilation_filter_x = (self.filter_x - 1) * (self.dilation_x - 1)
                dilation_filter_y = (self.filter_y - 1) * (self.dilation_y - 1)

                output_x = math.ceil(float(self.x_input - self.filter_x - dilation_filter_x + 1) / float(self.stride_x))
                output_y = math.ceil(float(self.y_input - self.filter_y - dilation_filter_y + 1) / float(self.stride_y))

            self.json_replacements = {
                "batches": self.batches,
                "input_ch": self.input_ch,
                "output_ch": self.output_ch,
                "input_x": self.x_input,
                "input_y": self.y_input,
                "weight_x": self.filter_x,
                "weight_y": self.filter_y,
                "output_x": output_x,
                "output_y": output_y,
                "input_scale": input_scale,
                "input_zp": input_zp,
                "w_scale": scaling_factors,
                "w_zp": w_zp,
                "bias_scale": bias_scale,
                "bias_zp": bias_zp,
                "output_scale": output_scale,
                "output_zp": output_zp,
                "stride_x": self.stride_x,
                "stride_y": self.stride_y,
                "dilation_x": self.dilation_x,
                "dilation_y": self.dilation_y,
                "type_pad": self.padding,
                "ch_mult": self.channel_multiplier
            }

            # Pack weights
            temp = np.reshape(weights, (len(weights) // 2, 2)).astype(np.uint8)
            temp = 0xff & ((0xf0 & (temp[:, 1] << 4)) | (temp[:, 0] & 0xf))
            weights = tf.convert_to_tensor(temp)

            # Generate tflite model
            if self.test_type == 'depthwise_conv':
                generated_json = self.generate_json_from_template(
                    None, weights, int8_time_weights=True, bias_data=biases, bias_buffer=3)
            else:
                generated_json = self.generate_json_from_template(weights, int8_time_weights=False,
                                                                  bias_data=quant_bias, bias_buffer=2)

            self.flatc_generate_tflite(generated_json, self.schema_file)

            filter_index = 1
            bias_index = 2

        else:
            if self.test_type == 'transpose_conv':
                weight_shape = [self.filter_y, self.filter_x, out_channel, self.input_ch]
            else:
                weight_shape = [self.filter_y, self.filter_x, self.filter_ch, out_channel]

            if weights is not None:
                weights = tf.reshape(weights, weight_shape)
            else:
                weights = self.get_randomized_data(weight_shape,
                                                   self.kernel_table_file,
                                                   minrange=TestSettings.INT32_MIN,
                                                   maxrange=TestSettings.INT32_MAX,
                                                   decimals=1,
                                                   regenerate=self.regenerate_new_weights)

            # Create a one layer Keras model.
            model = keras.models.Sequential()
            input_shape = (self.batches, self.y_input, self.x_input, self.input_ch)
            model.add(keras.layers.InputLayer(input_shape=input_shape[1:], batch_size=self.batches))
            if self.test_type == 'conv':
                conv_layer = keras.layers.Conv2D(self.output_ch,
                                                    kernel_size=(self.filter_y, self.filter_x),
                                                    strides=(self.stride_y, self.stride_x),
                                                    padding=self.padding,
                                                    input_shape=input_shape[1:],
                                                    dilation_rate=(self.dilation_y, self.dilation_x),
                                                    groups=self.groups)
                model.add(conv_layer)
                conv_layer.set_weights([weights, biases])
            elif self.test_type == 'depthwise_conv':
                depthwise_layer = keras.layers.DepthwiseConv2D(kernel_size=(self.filter_y, self.filter_x),
                                                                  strides=(self.stride_y, self.stride_x),
                                                                  padding=self.padding,
                                                                  depth_multiplier=self.channel_multiplier,
                                                                  input_shape=input_shape[1:],
                                                                  dilation_rate=(self.dilation_y, self.dilation_x))
                model.add(depthwise_layer)
                depthwise_layer.set_weights([weights, biases])
            elif self.test_type == 'transpose_conv':
                transposed_conv_layer = keras.layers.Conv2DTranspose(self.output_ch,
                                                                        kernel_size=(self.filter_y, self.filter_x),
                                                                        strides=(self.stride_y, self.stride_x),
                                                                        padding=self.padding,
                                                                        input_shape=input_shape[1:],
                                                                        dilation_rate=(self.dilation_y,
                                                                                       self.dilation_x),
                                                                        use_bias=self.generate_bias)
                model.add(transposed_conv_layer)
                if self.generate_bias:
                    transposed_conv_layer.set_weights([weights, biases])
                else:
                    transposed_conv_layer.set_weights([weights])

            if self.test_type == 'transpose_conv' and self.generate_bias:
                filter_index = 3
                bias_index = 2
            elif self.is_int16xint8 and self.generate_bias:
                filter_index = 1
                bias_index = 2
            else:
                filter_index = 2
                bias_index = 1

            self.convert_model(model, inttype)

        interpreter = self.interpret_model(input_data, inttype)

        all_layers_details = interpreter.get_tensor_details()
        filter_layer = all_layers_details[filter_index]

        if self.test_type == 'transpose_conv' and not self.generate_bias:
            # TODO: real null bias for all operators and not only transpose conv.
            bias_layer = None
            biases = []
        else:
            bias_layer = all_layers_details[bias_index]

        if self.int4_weights:
            expected_weight_size = math.ceil(interpreter.get_tensor(filter_layer['index']).size / 2)
        else:
            expected_weight_size = interpreter.get_tensor(filter_layer['index']).size

        if weights.numpy().size != expected_weight_size or \
                (self.generate_bias and biases.numpy().size != interpreter.get_tensor(bias_layer['index']).size):
            raise RuntimeError(f"Dimension mismatch for {self.testdataset}")

        output_details = interpreter.get_output_details()

        self.x_output = output_details[0]['shape'][2]
        self.y_output = output_details[0]['shape'][1]

        if self.test_type == 'transpose_conv':
            self.calculate_padding(self.x_input, self.y_input, self.x_output, self.y_output)
        else:
            self.calculate_padding(self.x_output, self.y_output, self.x_input, self.y_input)

        self.generate_c_array(self.input_data_file_prefix, input_data, datatype=datatype)
        self.generate_c_array(
            self.weight_data_file_prefix, interpreter.get_tensor(filter_layer['index']), pack=self.int4_weights)

        self.scaling_factors = filter_layer['quantization_parameters']['scales']
        per_channel_multiplier, per_channel_shift = self.generate_quantize_per_channel_multiplier()
        self.generate_c_array("output_mult", per_channel_multiplier, datatype='int32_t')
        self.generate_c_array("output_shift", per_channel_shift, datatype='int32_t')

        if self.generate_bias:
            self.generate_c_array(
                self.bias_data_file_prefix, interpreter.get_tensor(bias_layer['index']), bias_datatype)
        else:
            self.generate_c_array(
                self.bias_data_file_prefix, biases, bias_datatype)

        # Generate reference
        interpreter.invoke()
        output_data = interpreter.get_tensor(output_details[0]["index"])
        self.generate_c_array(self.output_data_file_prefix,
                              np.clip(output_data, self.out_activation_min, self.out_activation_max),
                              datatype=datatype)

        self.write_c_config_header()
        self.write_c_header_wrapper()