generate_test_data.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. #!/usr/bin/env python3
  2. #
  3. # Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
  4. #
  5. # SPDX-License-Identifier: Apache-2.0
  6. #
  7. # Licensed under the Apache License, Version 2.0 (the License); you may
  8. # not use this file except in compliance with the License.
  9. # You may obtain a copy of the License at
  10. #
  11. # www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an AS IS BASIS, WITHOUT
  15. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. # See the License for the specific language governing permissions and
  17. # limitations under the License.
  18. #
  19. import os
  20. import sys
  21. import math
  22. import argparse
  23. import numpy as np
  24. import warnings as w
  25. from packaging import version
  26. from abc import ABC, abstractmethod
  27. w.filterwarnings('ignore', category=FutureWarning)
  28. try:
  29. import tensorflow as tf
  30. except Exception as e:
  31. print(e)
  32. sys.exit(1)
  33. REQUIRED_MINIMUM_TENSORFLOW_VERSION = version.parse("2.0.0b0")
  34. DEFAULT_TESTDATA_SET = 'basic'
  35. LICENSE = """
  36. /*
  37. * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
  38. *
  39. * SPDX-License-Identifier: Apache-2.0
  40. *
  41. * Licensed under the Apache License, Version 2.0 (the License); you may
  42. * not use this file except in compliance with the License.
  43. * You may obtain a copy of the License at
  44. *
  45. * www.apache.org/licenses/LICENSE-2.0
  46. *
  47. * Unless required by applicable law or agreed to in writing, software
  48. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  49. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  50. * See the License for the specific language governing permissions and
  51. * limitations under the License.
  52. */
  53. """
  54. def parse_args():
  55. parser = argparse.ArgumentParser(description="Generate input and refererence output data for unittests."
  56. "It can regenerate all or load all data/input or only parts of it, "
  57. "which may be useful when debugging.")
  58. parser.add_argument('--dataset', type=str, default=DEFAULT_TESTDATA_SET, help="Name of generated test set")
  59. parser.add_argument('--regenerate-weights', action='store_true', help="Regenerate and store new weights")
  60. parser.add_argument('--regenerate-input', action='store_true', help="Regenerate and store new input")
  61. parser.add_argument('--regenerate-biases', action='store_true', help="Regenerate and store new biases")
  62. parser.add_argument('-a', '--regenerate-all', action='store_true', help="Regenerate and store all data")
  63. parser.add_argument('-t', '--type', type=str, default='conv', choices=['conv', 'pooling'], help='Type of test.')
  64. args = parser.parse_args()
  65. return args
  66. class TestSettings(ABC):
  67. OUTDIR = 'TestCases/TestData/'
  68. PREGEN = 'PregeneratedData/'
  69. INT_MAX = 32767
  70. INT_MIN = -32767
  71. def __init__(self, args, in_ch, out_ch, x_in, y_in, w_x, w_y, stride_x, stride_y, pad, randmin, randmax,
  72. outminrange=-128, outmaxrange=127, batches=1):
  73. self.minrange = -128
  74. self.maxrange = 127
  75. # Randomization interval
  76. self.mins = randmin
  77. self.maxs = randmax
  78. self.input_ch = in_ch
  79. self.output_ch = out_ch
  80. self.x_input = x_in
  81. self.y_input = y_in
  82. self.filter_x = w_x
  83. self.filter_y = w_y
  84. self.stride_x = stride_x
  85. self.stride_y = stride_y
  86. self.batches = batches
  87. self.has_padding = pad
  88. self.scaling_factors = []
  89. minrange = randmin - 1
  90. maxrange = randmax + 1
  91. (self.input_scale, self.input_zero_point) = self.derive_scale_and_zeropoint_from_min_max(minrange, maxrange)
  92. (self.output_scale, self.output_zero_point) = self.derive_scale_and_zeropoint_from_min_max(outminrange,
  93. outmaxrange)
  94. self.generated_header_files = []
  95. self.pregenerated_data_dir = self.PREGEN
  96. self.testdataset = DEFAULT_TESTDATA_SET
  97. self.testdataset = args.dataset
  98. self.kernel_table_file = self.pregenerated_data_dir + self.testdataset + '/' + 'kernel.txt'
  99. self.inputs_table_file = self.pregenerated_data_dir + self.testdataset + '/' + 'input.txt'
  100. self.bias_table_file = self.pregenerated_data_dir + self.testdataset + '/' + 'bias.txt'
  101. self.parameters_file = self.pregenerated_data_dir + self.testdataset + '/' + 'params.txt'
  102. self.set_output_dims_and_padding()
  103. self.regenerate_new_weights = args.regenerate_weights
  104. self.regenerate_new_input = args.regenerate_input
  105. self.regenerate_new_bias = args.regenerate_biases
  106. if not os.path.exists(self.parameters_file) or args.regenerate_all:
  107. self.regenerate_new_bias = True
  108. self.regenerate_new_weights = True
  109. self.regenerate_new_input = True
  110. self.save_parameters()
  111. else:
  112. self.load_parameters()
  113. self.headers_dir = self.OUTDIR + self.testdataset + '/'
  114. def clamp_int8(self, result):
  115. int8_min = self.minrange
  116. int8_max = self.maxrange
  117. if result < int8_min:
  118. result = int8_min
  119. elif result > int8_max:
  120. result = int8_max
  121. return result
  122. def derive_scale_and_zeropoint_from_min_max(self, minrange, maxrange):
  123. scale = (maxrange - minrange) / ((self.maxrange * 1.0) - self.minrange)
  124. zeropoint = self.minrange + int(-minrange / scale + 0.5)
  125. zeropoint = max(-128, min(zeropoint, 128))
  126. return (scale, zeropoint)
  127. def save_multiple_dim_array_in_txt(self, file, data):
  128. header = ','.join(map(str, data.shape))
  129. np.savetxt(file, data.reshape(-1, data.shape[-1]), header=header,
  130. delimiter=',')
  131. def load_multiple_dim_array_from_txt(self, file):
  132. with open(file) as f:
  133. shape = list(map(int, next(f)[1:].split(',')))
  134. data = np.genfromtxt(f, delimiter=',').reshape(shape)
  135. return data.astype(np.float32)
  136. def save_parameters(self):
  137. regendir = os.path.dirname(self.parameters_file)
  138. if not os.path.exists(regendir):
  139. os.makedirs(regendir)
  140. params = np.array([self.input_ch, self.output_ch, self.x_input, self.y_input, self.filter_x, self.filter_y,
  141. self.stride_x, self.stride_y, self.pad_x, self.pad_y, self.batches, self.has_padding])
  142. np.savetxt(self.parameters_file, params, fmt='%i')
  143. def load_parameters(self):
  144. params = np.loadtxt(self.parameters_file).astype(int)
  145. (self.input_ch, self.output_ch, self.x_input, self.y_input, self.filter_x, self.filter_y,
  146. self.stride_x, self.stride_y, self.pad_x, self.pad_y, self.batches, self.has_padding) = \
  147. (map(lambda x: x, params))
  148. def convert_tensor_np(self, tensor_in, converter):
  149. w = tensor_in.numpy()
  150. shape = w.shape
  151. w = w.ravel()
  152. fw = converter(w)
  153. fw.shape = shape
  154. return tf.convert_to_tensor(fw)
  155. def convert_tensor(self, tensor_in, converter):
  156. w = tensor_in.numpy()
  157. shape = w.shape
  158. w = w.ravel()
  159. normal = np.array(w)
  160. float_normal = []
  161. for i in normal:
  162. float_normal.append(converter(i))
  163. np_float_array = np.asarray(float_normal)
  164. np_float_array.shape = shape
  165. return tf.convert_to_tensor(np_float_array)
  166. def get_randomized_data(self, dims, npfile, regenerate, decimals=0):
  167. if not os.path.exists(npfile) or regenerate:
  168. regendir = os.path.dirname(npfile)
  169. if not os.path.exists(regendir):
  170. os.makedirs(regendir)
  171. if decimals == 0:
  172. data = tf.Variable(tf.random.uniform(dims, minval=self.mins, maxval=self.maxs, dtype=tf.dtypes.int32))
  173. data = tf.cast(data, dtype=tf.float32)
  174. else:
  175. data = tf.Variable(tf.random.uniform(dims, minval=self.mins, maxval=self.maxs, dtype=tf.dtypes.float32))
  176. data = np.around(data.numpy(), decimals)
  177. data = tf.convert_to_tensor(data)
  178. print("Saving data to {}".format(npfile))
  179. self.save_multiple_dim_array_in_txt(npfile, data.numpy())
  180. else:
  181. print("Loading data from {}".format(npfile))
  182. data = tf.convert_to_tensor(self.load_multiple_dim_array_from_txt(npfile))
  183. return data
  184. def write_c_header_wrapper(self):
  185. filename = "test_data.h"
  186. filepath = self.headers_dir + filename
  187. print("Generating C header wrapper {}...".format(filepath))
  188. with open(filepath, 'w+') as f:
  189. f.write("{}\n\n".format(LICENSE))
  190. f.write("// Generated by {}\n".format(os.path.basename(__file__)))
  191. while len(self.generated_header_files) > 0:
  192. f.write('#include "{}"\n'.format(self.generated_header_files.pop()))
  193. def write_c_config_header(self):
  194. filename = "config_data.h"
  195. self.generated_header_files.append(filename)
  196. filepath = self.headers_dir + filename
  197. prefix = self.testdataset.upper()
  198. print("Writing C header with config data {}...".format(filepath))
  199. with open(filepath, "w+") as f:
  200. f.write("{}\n".format(LICENSE))
  201. f.write("#pragma once\n")
  202. f.write("// Generated by {}\n".format(os.path.basename(__file__)))
  203. f.write("#define {}_OUT_CH {}\n".format(prefix, self.output_ch))
  204. f.write("#define {}_IN_CH {}\n".format(prefix, self.input_ch))
  205. f.write("#define {}_CONV_W {}\n".format(prefix, self.x_input))
  206. f.write("#define {}_CONV_H {}\n".format(prefix, self.y_input))
  207. f.write("#define {}_FILTER_X {}\n".format(prefix, self.filter_x))
  208. f.write("#define {}_FILTER_Y {}\n".format(prefix, self.filter_y))
  209. f.write("#define {}_STRIDE_X {}\n".format(prefix, self.stride_x))
  210. f.write("#define {}_STRIDE_Y {}\n".format(prefix, self.stride_y))
  211. f.write("#define {}_PAD_X {}\n".format(prefix, self.pad_x))
  212. f.write("#define {}_PAD_Y {}\n".format(prefix, self.pad_y))
  213. f.write("#define {}_OUT_CONV_W {}\n".format(prefix, self.x_output))
  214. f.write("#define {}_OUT_CONV_H {}\n".format(prefix, self.y_output))
  215. f.write("#define {}_DST_SIZE {}\n".format(prefix, self.x_output * self.y_output * self.output_ch))
  216. f.write("#define {}_INPUT_SIZE {}\n".format(prefix, self.x_input * self.y_input * self.input_ch))
  217. f.write("#define {}_INPUT_OFFSET {}\n".format(prefix, -self.input_zero_point))
  218. f.write("#define {}_OUTPUT_OFFSET {}\n".format(prefix, self.output_zero_point))
  219. f.write("#define {}_OUT_ACTIVATION_MIN {}\n".format(prefix, self.minrange))
  220. f.write("#define {}_OUT_ACTIVATION_MAX {}\n".format(prefix, self.maxrange))
  221. f.write("#define {}_INPUT_BATCHES {}\n".format(prefix, self.batches))
  222. def generate_c_array(self, name, array, datatype="q7_t", const="const "):
  223. if not os.path.exists(self.headers_dir):
  224. os.makedirs(self.headers_dir)
  225. w = None
  226. if type(array) is list:
  227. w = array
  228. size = len(array)
  229. else:
  230. w = array.numpy()
  231. w = w.ravel()
  232. size = tf.size(array)
  233. filename = name + "_data.h"
  234. filepath = self.headers_dir + filename
  235. self.generated_header_files.append(filename)
  236. print("Generating C header {}...".format(filepath))
  237. with open(filepath, "w+") as f:
  238. f.write("{}\n".format(LICENSE))
  239. f.write("#pragma once\n")
  240. f.write("// Generated by {}\n".format(os.path.basename(__file__)))
  241. f.write("#include <stdint.h>\n\n")
  242. f.write(const + datatype + " " + self.testdataset + '_' + name + "[%d] =\n{\n" % size)
  243. for i in range(size - 1):
  244. f.write(" %d,\n" % w[i])
  245. f.write(" %d\n" % w[size - 1])
  246. f.write("};\n")
  247. def set_output_dims_and_padding(self):
  248. if self.has_padding:
  249. self.x_output = math.ceil(float(self.x_input) / float(self.stride_x))
  250. self.y_output = math.ceil(float(self.y_input) / float(self.stride_y))
  251. self.padding = 'SAME'
  252. pad_along_width = max((self.x_output - 1) * self.stride_x + self.filter_x - self.x_input, 0)
  253. pad_along_height = max((self.y_output - 1) * self.stride_y + self.filter_y - self.y_input, 0)
  254. pad_top = pad_along_height // 2
  255. pad_left = pad_along_width // 2
  256. self.pad_x = pad_left
  257. self.pad_y = pad_top
  258. else:
  259. self.x_output = math.ceil(float(self.x_input - self.filter_x + 1) / float(self.stride_x))
  260. self.y_output = math.ceil(float(self.y_input - self.filter_y + 1) / float(self.stride_y))
  261. self.padding = 'VALID'
  262. self.pad_x = 0
  263. self.pad_y = 0
  264. @abstractmethod
  265. def generate_data(self, input_data=None, weights=None, biases=None):
  266. ''' Must be overriden '''
  267. class ConvSettings(TestSettings):
  268. def __init__(self, args, in_ch=1, out_ch=1, x_in=7, y_in=7, w_x=3, w_y=3, stride_x=2, stride_y=2,
  269. pad=True, randmin=-7, randmax=7, outminrange=-128, outmaxrange=127, batches=1):
  270. super().__init__(args, in_ch, out_ch, x_in, y_in, w_x, w_y, stride_x, stride_y, pad, randmin, randmax,
  271. outminrange, outmaxrange, batches)
  272. def quantize_bias(self, nparray):
  273. num_channels = self.output_ch
  274. quantized_values = []
  275. values = np.array(nparray)
  276. def quantize_float_to_int(value, scale):
  277. quantized = round(value / scale)
  278. if quantized > self.INT_MAX:
  279. quantized = self.INT_MAX
  280. elif quantized < self.INT_MIN:
  281. quantized = self.INT_MIN
  282. return quantized
  283. for x in range(num_channels):
  284. quantized_values.append(quantize_float_to_int(values[x], self.scaling_factors[x]*self.input_scale))
  285. return np.asarray(quantized_values)
  286. def reshape_kernel(self, kernel):
  287. kernel = tf.reshape(kernel, [self.output_ch, self.filter_y, self.filter_x, self.input_ch])
  288. kernel = tf.transpose(kernel, (1, 2, 0, 3))
  289. kernel = tf.transpose(kernel, (0, 1, 3, 2))
  290. return kernel
  291. def quantize_input(self, value):
  292. result = round(value / self.input_scale) + self.input_zero_point
  293. return self.clamp_int8(result)
  294. def quantize_output(self, value):
  295. result = round(value / self.output_scale) + self.output_zero_point
  296. return self.clamp_int8(result)
  297. def quantize_filter(self, nparray):
  298. quantized_values = []
  299. channel_count = self.output_ch
  300. input_size = self.filter_y * self.filter_x * self.input_ch * self.output_ch
  301. per_channel_size = input_size // channel_count
  302. values = np.array(nparray)
  303. stride = 1
  304. channel_stride = per_channel_size
  305. for channel in range(channel_count):
  306. fmin = 0
  307. fmax = 0
  308. for i in range(per_channel_size):
  309. idx = channel * channel_stride + i * stride
  310. fmin = min(fmin, values[idx])
  311. fmax = max(fmax, values[idx])
  312. self.scaling_factors.append(max(abs(fmin), abs(fmax)) / self.maxrange)
  313. for x in range(per_channel_size):
  314. chs = channel * channel_stride + x * stride
  315. quantized_value = round(round(values[chs]) / self.scaling_factors[channel])
  316. # Clamp
  317. quantized_value = min(127, max(-127, quantized_value))
  318. quantized_values.append(quantized_value)
  319. return np.asarray(quantized_values)
  320. def generate_quantize_per_channel_multiplier(self):
  321. num_channels = self.output_ch
  322. per_channel_multiplier = []
  323. per_channel_shift = []
  324. if len(self.scaling_factors) != num_channels:
  325. raise RuntimeError("Missing scaling factors")
  326. def quantize_scale(scale):
  327. significand, shift = math.frexp(scale)
  328. significand_q31 = round(significand * (1 << 31))
  329. return significand_q31, shift
  330. for i in range(num_channels):
  331. effective_output_scale = self.input_scale * self.scaling_factors[i] / self.output_scale
  332. (quantized_multiplier, shift) = quantize_scale(effective_output_scale)
  333. per_channel_multiplier.append(quantized_multiplier)
  334. per_channel_shift.append(shift)
  335. self.generate_c_array("output_mult", per_channel_multiplier, datatype='int32_t')
  336. self.generate_c_array("output_shift", per_channel_shift, datatype='int32_t')
  337. def convolution(self, indata, weights, bias=None):
  338. indata = tf.cast(indata, dtype=tf.dtypes.float32)
  339. weights = tf.cast(weights, dtype=tf.dtypes.float32)
  340. bias = tf.cast(bias, dtype=tf.dtypes.float32)
  341. out = tf.nn.conv2d(indata, weights, strides=[1, self.stride_x, self.stride_y, 1], padding=self.padding)
  342. if tf.TensorShape([self.batches, self.y_output, self.x_output, self.output_ch]).as_list() != \
  343. out.shape.as_list():
  344. raise RuntimeError("Shape mismatch, need to regenerate data?")
  345. out = tf.nn.bias_add(out, bias)
  346. out = tf.clip_by_value(out, self.minrange, self.maxrange)
  347. return out
  348. def generate_data(self, input_data=None, weights=None, biases=None):
  349. # Tensorflow Lite has a different kernel format compared to Tensorflow
  350. reshaped_weights = None
  351. # Generate or load saved data unless hardcoded data provided
  352. if input_data is not None:
  353. input_data = tf.reshape(input_data, [self.batches, self.y_input, self.x_input, self.input_ch])
  354. else:
  355. input_data = self.get_randomized_data([self.batches, self.y_input, self.x_input, self.input_ch],
  356. self.inputs_table_file,
  357. regenerate=self.regenerate_new_input)
  358. if weights is not None:
  359. weights = tf.reshape(weights, [self.filter_y, self.filter_x, self.input_ch, self.output_ch])
  360. else:
  361. weights = self.get_randomized_data([self.filter_y, self.filter_x, self.input_ch, self.output_ch],
  362. self.kernel_table_file,
  363. regenerate=self.regenerate_new_weights)
  364. reshaped_weights = self.reshape_kernel(weights)
  365. if biases is not None:
  366. biases = tf.reshape(biases, [self.output_ch])
  367. else:
  368. biases = self.get_randomized_data([self.output_ch],
  369. self.bias_table_file,
  370. regenerate=self.regenerate_new_bias)
  371. # Generate conv reference
  372. conv = self.convolution(input_data, reshaped_weights, biases)
  373. # Quantize and write to C headers
  374. self.generate_c_array("input", self.convert_tensor(input_data, self.quantize_input))
  375. self.generate_c_array("weights", self.convert_tensor_np(weights, self.quantize_filter))
  376. self.generate_c_array("biases", self.convert_tensor_np(biases, self.quantize_bias), "int32_t")
  377. self.generate_quantize_per_channel_multiplier()
  378. self.generate_c_array("output_ref", self.convert_tensor(conv, self.quantize_output))
  379. self.write_c_config_header()
  380. self.write_c_header_wrapper()
  381. class PoolingSettings(TestSettings):
  382. def __init__(self, args, randmin=-7, randmax=7, in_ch=1, out_ch=1, x_in=7, y_in=7, w_x=3, w_y=3, stride_x=2,
  383. stride_y=2, batches=1, pad=True):
  384. super().__init__(args, randmin, randmax, in_ch, out_ch, x_in, y_in, w_x, w_y, stride_x, stride_y, batches, pad)
  385. def generate_data(self, input_data=None, weights=None, biases=None):
  386. # TODO
  387. pass
  388. if __name__ == '__main__':
  389. if version.parse(tf.__version__) < REQUIRED_MINIMUM_TENSORFLOW_VERSION:
  390. print("Unsupported tensorflow version, ", version.parse(tf.__version__))
  391. sys.exit(0)
  392. args = parse_args()
  393. if args.type == 'conv':
  394. generator = ConvSettings(args, in_ch=4, out_ch=17, x_in=15, y_in=15, w_x=1, w_y=1, stride_x=1, stride_y=1,
  395. pad=False, randmin=1, randmax=4, outminrange=-126, outmaxrange=127)
  396. elif args.type == 'pooling':
  397. generator = PoolingSettings(args, x_in=6, y_in=5, stride_x=1, stride_y=2, w_x=2, w_y=2)
  398. generator.generate_data()