FastMath.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. import os.path
  2. import numpy as np
  3. import itertools
  4. import Tools
  5. import math
  6. import numpy as np
  7. def q31accuracy(x):
  8. return(np.round(1.0*x * (1<<31)))
  9. def q15accuracy(x):
  10. return(np.round(1.0*x * (1<<15)))
  11. def q7accuracy(x):
  12. return(np.round(1.0*x * (1<<7)))
  13. def Q31toF32(x):
  14. return(1.0*x / 2**31)
  15. def Q15toF32(x):
  16. return(1.0*x / 2**15)
  17. def Q7toF32(x):
  18. return(1.0*x / 2**7)
  19. # Those patterns are used for tests and benchmarks.
  20. # For tests, there is the need to add tests for saturation
  21. # For benchmarks
  22. NBSAMPLES=256
  23. def cartesian(*somelists):
  24. r=[]
  25. for element in itertools.product(*somelists):
  26. r.append(element)
  27. return(r)
  28. # Fixed point division should not be called with a denominator of zero.
  29. # But if it is, it should return a saturated result.
  30. def divide(f,r):
  31. e = 0
  32. a,b=r
  33. if f == Tools.Q31:
  34. e = 1.0 / (1<<31)
  35. a = 1.0*q31accuracy(a) / (2**31)
  36. b = 1.0*q31accuracy(b) / (2**31)
  37. if f == Tools.Q15:
  38. e = 1.0 / (1<<15)
  39. a = 1.0*q15accuracy(a) / (2**15)
  40. b = 1.0*q15accuracy(b) / (2**15)
  41. if f == Tools.Q7:
  42. e = 1.0 / (1<<7)
  43. a = 1.0*q7accuracy(a) / (2**7)
  44. b = 1.0*q7accuracy(b) / (2**7)
  45. if b == 0.0:
  46. if a >= 0.0:
  47. return(1.0,0)
  48. else:
  49. return(-1.0,0)
  50. k = 0
  51. while abs(a) > abs(b):
  52. a = a / 2.0
  53. k = k + 1
  54. # In C code we don't saturate but instead generate the right value
  55. # with a shift of 1.
  56. # So this test is to ease the comparison between the Python reference
  57. # and the output of the division algorithm in C
  58. if abs(a/b) > 1 - e:
  59. a = a / 2.0
  60. k = k + 1
  61. return(a/b,k)
  62. def initLogValues(format):
  63. if format == Tools.Q15:
  64. vals=np.linspace(np.float_power(2,-15),1.0,num=125)
  65. elif format == Tools.F16:
  66. vals=np.linspace(np.float_power(2,-10),1.0,num=125)
  67. else:
  68. vals=np.linspace(np.float_power(2,-31),1.0,num=125)
  69. ref=np.log(vals)
  70. if format==Tools.Q31 :
  71. # Format must be Q5.26
  72. ref = ref / 32.0
  73. if format == Tools.Q15:
  74. # Format must be Q4.11
  75. ref = ref / 16.0
  76. return(vals,ref)
  77. def normalizeToOne(x):
  78. s = 0
  79. while (abs(x)>1):
  80. x = x /2.0
  81. s = s + 1
  82. return(int(s),x)
  83. def writeTests(config,format):
  84. a1=np.array([0,math.pi/4,math.pi/2,3*math.pi/4,math.pi,5*math.pi/4,3*math.pi/2,2*math.pi-1e-6])
  85. a2=np.array([-math.pi/4,-math.pi/2,-3*math.pi/4,-math.pi,-5*math.pi/4,-3*math.pi/2,-2*math.pi-1e-6])
  86. a3 = a1 + 2*math.pi
  87. angles=np.concatenate((a1,a2,a3))
  88. refcos = np.cos(angles)
  89. refsin = np.sin(angles)
  90. vals=np.linspace(0.0,1.0,1024)
  91. sqrtvals=np.sqrt(vals)
  92. # Negative values in CMSIS are giving 0
  93. vals[0] = -0.4
  94. sqrtvals[0] = 0.0
  95. if format != Tools.F64 and format != 0 and format != 16:
  96. angles=np.concatenate((a1,a2,a1))
  97. angles = angles / (2*math.pi)
  98. config.writeInput(1, angles,"Angles")
  99. config.writeInput(1, vals,"SqrtInput")
  100. config.writeReference(1, sqrtvals,"Sqrt")
  101. config.writeReference(1, refcos,"Cos")
  102. config.writeReference(1, refsin,"Sin")
  103. # For benchmarks
  104. samples=np.random.randn(NBSAMPLES)
  105. samples = np.abs(Tools.normalize(samples))
  106. config.writeInput(1, samples,"Samples")
  107. numerator=np.linspace(-0.9,0.9)
  108. numerator=np.hstack([numerator,np.array([-1.0,1.0])])
  109. denominator=np.linspace(-0.9,0.9)
  110. denominator=np.hstack([denominator,np.array([-1.0,1.0])])
  111. samples=cartesian(numerator,denominator)
  112. numerator=[x[0] for x in samples]
  113. denominator=[x[1] for x in samples]
  114. result=[divide(format,x) for x in samples]
  115. resultValue=[x[0] for x in result]
  116. resultShift=[x[1] for x in result]
  117. config.setOverwrite(False)
  118. config.writeInput(1, numerator,"Numerator")
  119. config.writeInput(1, denominator,"Denominator")
  120. config.writeReference(1, resultValue,"DivisionValue")
  121. config.writeReferenceS16(1, resultShift,"DivisionShift")
  122. config.setOverwrite(False)
  123. vals,ref=initLogValues(format)
  124. config.writeInput(1, vals,"LogInput")
  125. config.writeReference(1, ref,"Log")
  126. config.setOverwrite(False)
  127. # Testing of ATAN2
  128. angles=np.linspace(0.0,2*math.pi,1000,endpoint=True)
  129. angles=np.hstack([angles,np.array([math.pi/4.0])])
  130. if format == Tools.Q31 or format == Tools.Q15:
  131. radius=[1.0]
  132. else:
  133. radius=np.linspace(0.1,0.9,10,endpoint=True)
  134. combinations = cartesian(radius,angles)
  135. res=[]
  136. yx = []
  137. for r,angle in combinations:
  138. x = r*np.cos(angle)
  139. y = r*np.sin(angle)
  140. res.append(np.arctan2(y,x))
  141. yx.append(y)
  142. yx.append(x)
  143. config.writeInput(1, np.array(yx).flatten(),"Atan2Input")
  144. # Q2.29 or Q2.13 to represent PI in the output
  145. if format == Tools.Q31 or format == Tools.Q15:
  146. config.writeReference(1, np.array(res)/4.0,"Atan2Ref")
  147. else:
  148. config.writeReference(1, np.array(res),"Atan2Ref")
  149. config.setOverwrite(False)
  150. if format == Tools.Q31 or format == Tools.Q15:
  151. if format == Tools.Q31:
  152. theInput=np.array([1.0-1e-6,0.6,0.5,0.3,0.25,0.1,1.0/(1<<31)])
  153. if format == Tools.Q15:
  154. theInput=np.array([1.0-1e-6,0.6,0.5,0.3,0.25,0.1,1.0/(1<<15)])
  155. ref=1.0 / theInput
  156. shiftAndScaled=np.array([normalizeToOne(x) for x in ref]).transpose()
  157. shiftValues=shiftAndScaled[0].astype(np.int16)
  158. scaledValues=shiftAndScaled[1]
  159. #print(shiftAndScaled)
  160. config.writeInput(1, np.array(theInput),"RecipInput")
  161. config.writeReference(1, scaledValues,"RecipRef")
  162. config.writeReferenceS16(1, shiftValues,"RecipShift")
  163. def tocint32(x):
  164. if x < 0:
  165. return((0x10000000000000000 + x) & 0xFFFFFFFF)
  166. else:
  167. return(x & 0xFFFFFFFF)
  168. # C and Python are not rounding the integer division
  169. # in the same way
  170. def cdiv(a,b):
  171. sign = 1
  172. if ((a<0) and (b>0)) or ((a>0) and (b<0)):
  173. sign = -1
  174. a= abs(a)
  175. b = abs(b)
  176. d = sign*(a // b)
  177. return(d)
  178. def testInt64(config):
  179. theInput=[0x1000000080000000,
  180. 0x0000000080000000,
  181. 0x0000000020000000,
  182. 0x0000000000000000]
  183. ref=[0x40000002,
  184. 0x40000000,
  185. 0x40000000,
  186. 0
  187. ]
  188. norms=[-30,-1,1,0]
  189. config.writeInputU64(1,np.array(theInput),"Norm64To32_Input")
  190. config.writeReferenceS16(1,norms,"RefNorm64To32_Norms")
  191. config.writeReferenceS32(1,ref,"RefNorm64To32_Vals")
  192. config.setOverwrite(False)
  193. allCombinations=[(0x7FFFFFFFFFFFFFFF,2),
  194. (-0x7FFFFFFFFFFFFFFF-1,2),
  195. ( 0x4000000000000000,0x7FFFFFFF),
  196. ( -0x4000000000000000,0x7FFFFFFF),
  197. ( 0x2000000000000000,0x7FFFFFFF),
  198. ( -0x2000000000000000,0x7FFFFFFF),
  199. ( 0x1000000000000000,0x7FFFFFFF),
  200. ( -0x1000000000000000,0x7FFFFFFF),
  201. ( 0x0000000080000000,2),
  202. ( -0x0000000080000000,2),
  203. ( 0x0000000040000000,2),
  204. ( -0x0000000080000000,2)
  205. ]
  206. res = [tocint32(cdiv(x,y)) for (x,y) in allCombinations]
  207. allCombinations=np.array(allCombinations,dtype=np.int64).flatten()
  208. config.writeInputS64(1,allCombinations[0::2],"DivDenInput")
  209. config.writeInputS32(1,allCombinations[1::2],"DivNumInput")
  210. config.writeReferenceU32(1, res,"DivRef")
  211. config.setOverwrite(False)
  212. def writeTestsFloat(config,format):
  213. writeTests(config,format)
  214. data1 = np.random.randn(20)
  215. data1 = np.abs(data1)
  216. data1 = data1 + 1e-3 # To avoid zero values
  217. data1 = Tools.normalize(data1)
  218. samples=np.concatenate((np.array([0.0,1.0]),np.linspace(-0.4,0.4)))
  219. config.writeInput(1, samples,"ExpInput")
  220. v = np.exp(samples)
  221. config.writeReference(1, v,"Exp")
  222. # For benchmarks and other tests
  223. samples=np.random.randn(NBSAMPLES)
  224. samples = np.abs(Tools.normalize(samples))
  225. config.writeInput(1, samples,"Samples")
  226. v = 1.0 / samples
  227. config.writeReference(1, v,"Inverse")
  228. def generatePatterns():
  229. PATTERNDIR = os.path.join("Patterns","DSP","FastMath","FastMath")
  230. PARAMDIR = os.path.join("Parameters","DSP","FastMath","FastMath")
  231. configf64=Tools.Config(PATTERNDIR,PARAMDIR,"f64")
  232. configf32=Tools.Config(PATTERNDIR,PARAMDIR,"f32")
  233. configf16=Tools.Config(PATTERNDIR,PARAMDIR,"f16")
  234. configq31=Tools.Config(PATTERNDIR,PARAMDIR,"q31")
  235. configq15=Tools.Config(PATTERNDIR,PARAMDIR,"q15")
  236. configq64=Tools.Config(PATTERNDIR,PARAMDIR,"q63")
  237. configf64.setOverwrite(False)
  238. configf32.setOverwrite(False)
  239. configf16.setOverwrite(False)
  240. configq31.setOverwrite(False)
  241. configq15.setOverwrite(False)
  242. configq64.setOverwrite(False)
  243. writeTestsFloat(configf64,Tools.F64)
  244. writeTestsFloat(configf32,0)
  245. writeTestsFloat(configf16,16)
  246. writeTests(configq31,31)
  247. writeTests(configq15,15)
  248. testInt64(configq64)
  249. if __name__ == '__main__':
  250. generatePatterns()