MFCC.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. ###########################################
  2. # Project: CMSIS DSP Library
  3. # Title: MFCC.py
  4. # Description: Test pattern generation for MFCC
  5. #
  6. # $Date: 02 September 2021
  7. # $Revision: V1.10.0
  8. #
  9. # Target Processor: Cortex-M and Cortex-A cores
  10. # -------------------------------------------------------------------- */
  11. #
  12. # Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13. #
  14. # SPDX-License-Identifier: Apache-2.0
  15. #
  16. # Licensed under the Apache License, Version 2.0 (the License); you may
  17. # not use this file except in compliance with the License.
  18. # You may obtain a copy of the License at
  19. #
  20. # www.apache.org/licenses/LICENSE-2.0
  21. #
  22. # Unless required by applicable law or agreed to in writing, software
  23. # distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. # See the License for the specific language governing permissions and
  26. # limitations under the License.
  27. ############################################
  28. import os.path
  29. import numpy as np
  30. import itertools
  31. import Tools
  32. import scipy
  33. import scipy.signal as sig
  34. import scipy.fftpack
  35. ################################
  36. #
  37. # Gives the same results as the tensorflow lite
  38. # MFCC if hamming window is used
  39. # (TF stft) is using hanning by default
  40. #
  41. DEBUG = False
  42. def frequencyToMelSpace(freq):
  43. return 1127.0 * np.log(1.0 + freq / 700.0)
  44. def melSpaceToFrequency(mels):
  45. return 700.0 * (np.exp(mels / 1127.0) - 1.0)
  46. def melFilterMatrix(fmin, fmax, numOfMelFilters,fs,FFTSize):
  47. filters = np.zeros((numOfMelFilters,int(FFTSize/2+1)))
  48. zeros = np.zeros(int(FFTSize // 2 ))
  49. fmin_mel = frequencyToMelSpace(fmin)
  50. fmax_mel = frequencyToMelSpace(fmax)
  51. mels = np.linspace(fmin_mel, fmax_mel, num=numOfMelFilters+2)
  52. linearfreqs = np.linspace( 0, fs/2.0, int(FFTSize // 2 + 1) )
  53. spectrogrammels = frequencyToMelSpace(linearfreqs)[1:]
  54. for n in range(numOfMelFilters):
  55. upper = (spectrogrammels - mels[n])/(mels[n+1]-mels[n])
  56. lower = (mels[n+2] - spectrogrammels)/(mels[n+2]-mels[n+1])
  57. filters[n, :] = np.hstack([0,np.maximum(zeros,np.minimum(upper,lower))])
  58. return filters
  59. def dctMatrix(numOfDctOutputs, numOfMelFilters):
  60. result = np.zeros((numOfDctOutputs,numOfMelFilters))
  61. s=(np.linspace(1,numOfMelFilters,numOfMelFilters) - 0.5)/numOfMelFilters
  62. for i in range(0, numOfDctOutputs):
  63. result[i,:]=np.cos(i * np.pi*s) * np.sqrt(2.0/numOfMelFilters)
  64. return result
  65. class MFCCConfig:
  66. def __init__(self,freq_min,freq_high,numOfMelFilters,numOfDctOutputs,FFTSize,sample_rate):
  67. self._freq_min=freq_min
  68. self._freq_high=freq_high
  69. self._numOfMelFilters = numOfMelFilters
  70. self._FFTSize=FFTSize
  71. self._sample_rate=sample_rate
  72. #self._window = sig.hann(FFTSize, sym=True)
  73. self._window = sig.hamming(FFTSize, sym=False)
  74. #print(self._window)
  75. self._numOfDctOutputs=numOfDctOutputs
  76. self._filters = melFilterMatrix(freq_min, freq_high, numOfMelFilters,sample_rate,FFTSize)
  77. self._dctMatrixFilters = dctMatrix(numOfDctOutputs, numOfMelFilters)
  78. def mfcc(self,audio):
  79. m = np.amax(np.abs(audio))
  80. if m != 0:
  81. s = 1.0 / m
  82. else:
  83. s = 1.0
  84. audio = audio * s
  85. audioWin = audio * self._window
  86. if DEBUG:
  87. print(audioWin)
  88. audioFFT = scipy.fftpack.fft(audioWin)
  89. if DEBUG:
  90. print(audioFFT)
  91. audioPower = np.abs(audioFFT)
  92. if DEBUG:
  93. print(audioPower)
  94. filterLimit = int(1 + self._FFTSize // 2)
  95. audioPower=audioPower[:filterLimit]
  96. audioFiltered = np.dot(self._filters,audioPower)
  97. if DEBUG:
  98. print(audioFiltered)
  99. audioLog = np.log(audioFiltered + 1e-6)
  100. cepstral_coefficents = np.dot(self._dctMatrixFilters, audioLog)
  101. return(cepstral_coefficents)
  102. debug=np.array([ 0.65507051 ,-0.94647589 ,0.00627239 ,0.14151286 ,-0.10863318 ,-0.36370327
  103. ,0.05777126 ,-0.11915792 ,0.50183546 ,-0.31461335 ,0.66440771 ,0.05389963
  104. ,0.39690544 ,0.25424852 ,-0.17045277 ,0.09649268 ,0.87357385 ,-0.44666372
  105. ,-0.02637822 ,-0.10055151 ,-0.14610252 ,-0.05981251 ,-0.02999124 ,0.60923213
  106. ,0.10530095 ,0.35684248 ,0.21845946 ,0.47845017 ,-0.60206979 ,0.25186908
  107. ,-0.27410056 ,-0.07080467 ,-0.05109539 ,-0.2666572 ,0.25483105 ,-0.86459185
  108. ,0.07733397 ,-0.58535444 ,0.06230904 ,-0.04161475 ,-0.17467296 ,0.77721125
  109. ,-0.01728161 ,-0.32141218 ,0.36674466 ,-0.17932843 ,0.78486115 ,0.12469579
  110. ,-0.94796877 ,0.05536031 ,0.32627676 ,0.46628512 ,-0.02585836 ,-0.51439834
  111. ,0.21387904 ,0.16319442 ,-0.01020818 ,-0.77161183 ,0.07754634 ,-0.24970455
  112. ,0.2368003 ,0.35167963 ,0.14620137 ,-0.02415204 ,0.91086167 ,-0.02434647
  113. ,-0.3968239 ,-0.04703925 ,-0.43905103 ,-0.34834965 ,0.33728158 ,0.15138992
  114. ,-0.43218885 ,0.26619718 ,0.07177906 ,0.33393581 ,-0.50306915 ,-0.63101084
  115. ,-0.08128395 ,-0.06569788 ,0.84232797 ,-0.32436751 ,0.02528537 ,-0.3498329
  116. ,0.41859931 ,0.07794887 ,0.4571989 ,0.24290963 ,0.08437417 ,-0.01371585
  117. ,-0.00103008 ,0.83978697 ,-0.29001237 ,0.14438743 ,0.11943318 ,-0.25576402
  118. ,0.25151083 ,0.07886626 ,0.11565781 ,-0.01582203 ,0.1310246 ,-0.5553611
  119. ,-0.37950665 ,0.44179691 ,0.08460877 ,0.30646419 ,0.48927934 ,-0.21240309
  120. ,0.36844264 ,0.49686615 ,-0.81617664 ,0.52221472 ,-0.05188992 ,-0.03929655
  121. ,-0.47674501 ,-0.54506781 ,0.30711148 ,0.10049337 ,-0.47549213 ,0.59106713
  122. ,-0.62276051 ,-0.35182917 ,0.14612027 ,0.56142168 ,-0.01053732 ,0.35782179
  123. ,-0.27220781 ,-0.03672346 ,-0.11282222 ,0.3364912 ,-0.22352515 ,-0.04245287
  124. ,0.56968605 ,-0.14023724 ,-0.82982905 ,0.00860008 ,0.37920345 ,-0.53749318
  125. ,-0.12761215 ,0.08567603 ,0.47020765 ,-0.28794812 ,-0.33888971 ,0.01850441
  126. ,0.66848233 ,-0.26532759 ,-0.20777571 ,-0.68342729 ,-0.41498696 ,0.00593224
  127. ,0.02229368 ,0.75596329 ,0.29447568 ,-0.1106449 ,0.24181939 ,0.05807497
  128. ,-0.14343857 ,0.304988 ,0.00689148 ,-0.06264758 ,0.25864714 ,-0.22252155
  129. ,0.28621689 ,0.17031599 ,-0.34694027 ,-0.01625718 ,0.39834181 ,0.01259659
  130. ,-0.28022716 ,-0.02506168 ,-0.10276881 ,0.31733924 ,0.02787068 ,-0.09824124
  131. ,0.45147797 ,0.14451518 ,0.17996395 ,-0.70594978 ,-0.92943177 ,0.13649282
  132. ,-0.5938426 ,0.50289928 ,0.19635269 ,0.16811504 ,0.05803999 ,0.0037204
  133. ,0.13847419 ,0.30568038 ,0.3700732 ,0.21257548 ,-0.31151753 ,-0.28836886
  134. ,0.68743932 ,-0.11084429 ,-0.4673766 ,0.16637754 ,-0.38992572 ,0.16505578
  135. ,-0.07499844 ,0.04226538 ,-0.11042177 ,0.0704542 ,-0.632819 ,-0.54898472
  136. ,0.26498649 ,-0.59380386 ,0.93387213 ,0.06526726 ,-0.23223558 ,0.07941394
  137. ,0.14325166 ,0.26914661 ,0.00925575 ,-0.34282161 ,-0.51418231 ,-0.12011075
  138. ,-0.26676314 ,-0.09999028 ,0.03027513 ,0.22846503 ,-0.08930338 ,-0.1867156
  139. ,0.66297846 ,0.32220769 ,-0.06015469 ,0.04034043 ,0.09595597 ,-1.
  140. ,-0.42933352 ,0.25069376 ,-0.26030918 ,-0.28511861 ,-0.19931228 ,0.24408572
  141. ,-0.3231952 ,0.45688981 ,-0.07354078 ,0.25669449 ,-0.44202722 ,0.11928406
  142. ,-0.32826109 ,0.52660984 ,0.03067858 ,0.11095242 ,0.19933679 ,0.03042371
  143. ,-0.34768682 ,0.09108447 ,0.61234556 ,0.1854931 ,0.19680502 ,0.27617564
  144. ,0.33381827 ,-0.47358967 ,0.28714328 ,-0.27495982])
  145. def noiseSignal(nb):
  146. return(2.0*np.random.rand(nb)-1.0)
  147. def sineSignal(freqRatio,nb):
  148. fc = nb / 2.0
  149. f = freqRatio*fc
  150. time = np.arange(0,nb)
  151. return(np.sin(2 * np.pi * f * time/nb))
  152. def noisySineSignal(noiseAmp,r,nb):
  153. return(noiseAmp*noiseSignal(nb) + r*sineSignal(r,nb))
  154. def writeTests(config,format):
  155. NBSAMPLES=[256,512,1024]
  156. if DEBUG:
  157. NBSAMPLES=[256]
  158. sample_rate = 16000
  159. FFTSize = 256
  160. numOfDctOutputs = 13
  161. freq_min = 64
  162. freq_high = sample_rate / 2
  163. numOfMelFilters = 20
  164. for nb in NBSAMPLES:
  165. inputsNoise=[]
  166. inputsSine=[]
  167. outputsNoise=[]
  168. outputsSine=[]
  169. inNoiselengths=[]
  170. outNoiselengths=[]
  171. inSinelengths=[]
  172. outSinelengths=[]
  173. FFTSize=nb
  174. mfccConfig=MFCCConfig(freq_min,freq_high,numOfMelFilters,numOfDctOutputs,FFTSize,sample_rate)
  175. # Add noise
  176. audio=np.random.randn(nb)
  177. audio = Tools.normalize(audio)
  178. if DEBUG:
  179. audio=debug
  180. inputsNoise += list(audio)
  181. refNoise=mfccConfig.mfcc(audio)
  182. if format == Tools.Q15:
  183. refNoise = refNoise / (1<<8)
  184. if format == Tools.Q31:
  185. refNoise = refNoise / (1<<8)
  186. #print(audio)
  187. if DEBUG:
  188. print(refNoise)
  189. outputsNoise+=list(refNoise)
  190. inNoiselengths+=[nb]
  191. outNoiselengths+=[numOfDctOutputs]
  192. config.writeInput(1, inputsNoise,"MFCCNoiseInput_%d_" % nb)
  193. config.writeReference(1, outputsNoise,"MFCCNoiseRef_%d_" % nb)
  194. # Sine
  195. audio=noisySineSignal(0.1,0.8,nb)
  196. #audio = Tools.normalize(audio)
  197. inputsSine += list(audio)
  198. refSine=mfccConfig.mfcc(audio)
  199. if format == Tools.Q15:
  200. refSine = refSine / (1<<8)
  201. if format == Tools.Q31:
  202. refSine = refSine / (1<<8)
  203. #print(audio)
  204. outputsSine+=list(refSine)
  205. inSinelengths+=[nb]
  206. outSinelengths+=[numOfDctOutputs]
  207. config.writeInput(1, inputsSine,"MFCCSineInput_%d_" % nb)
  208. config.writeReference(1, outputsSine,"MFCCSineRef_%d_" % nb)
  209. def generatePatterns():
  210. PATTERNDIR = os.path.join("Patterns","DSP","Transform","MFCC")
  211. PARAMDIR = os.path.join("Parameters","DSP","Transform","MFCC")
  212. configf32=Tools.Config(PATTERNDIR,PARAMDIR,"f32")
  213. configf16=Tools.Config(PATTERNDIR,PARAMDIR,"f16")
  214. configq31=Tools.Config(PATTERNDIR,PARAMDIR,"q31")
  215. configq15=Tools.Config(PATTERNDIR,PARAMDIR,"q15")
  216. #configq7=Tools.Config(PATTERNDIR,PARAMDIR,"q7")
  217. configf32.setOverwrite(False)
  218. configf16.setOverwrite(False)
  219. configq31.setOverwrite(False)
  220. configq15.setOverwrite(False)
  221. writeTests(configf32,0)
  222. writeTests(configf16,Tools.F16)
  223. writeTests(configq31,Tools.Q31)
  224. writeTests(configq15,Tools.Q15)
  225. if __name__ == '__main__':
  226. generatePatterns()