mfcc.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. import numpy as np
  2. def frequencyToMelSpace(freq):
  3. return 1127.0 * np.log(1.0 + freq / 700.0)
  4. def melSpaceToFrequency(mels):
  5. return 700.0 * (np.exp(mels / 1127.0) - 1.0)
  6. def melFilterMatrix(fmin, fmax, numOfMelFilters,fs,FFTSize):
  7. filters = np.zeros((numOfMelFilters,int(FFTSize/2+1)))
  8. zeros = np.zeros(int(FFTSize // 2 ))
  9. fmin_mel = frequencyToMelSpace(fmin)
  10. fmax_mel = frequencyToMelSpace(fmax)
  11. mels = np.linspace(fmin_mel, fmax_mel, num=numOfMelFilters+2)
  12. linearfreqs = np.linspace( 0, fs/2.0, int(FFTSize // 2 + 1) )
  13. spectrogrammels = frequencyToMelSpace(linearfreqs)[1:]
  14. filtPos=[]
  15. filtLen=[]
  16. totalLen = 0
  17. packedFilters = []
  18. for n in range(numOfMelFilters):
  19. upper = (spectrogrammels - mels[n])/(mels[n+1]-mels[n])
  20. lower = (mels[n+2] - spectrogrammels)/(mels[n+2]-mels[n+1])
  21. filters[n, :] = np.hstack([0,np.maximum(zeros,np.minimum(upper,lower))])
  22. nb = 0
  23. startFound = False
  24. for sample in filters[n, :]:
  25. if not startFound and sample != 0.0:
  26. startFound = True
  27. startPos = nb
  28. if startFound and sample == 0.0:
  29. endPos = nb - 1
  30. break
  31. nb = nb + 1
  32. filtLen.append(endPos - startPos+1)
  33. totalLen += endPos - startPos + 1
  34. filtPos.append(startPos)
  35. packedFilters += list(filters[n, startPos:endPos+1])
  36. return filtLen,filtPos,totalLen,packedFilters,filters
  37. def dctMatrix(numOfDctOutputs, numOfMelFilters):
  38. result = np.zeros((numOfDctOutputs,numOfMelFilters))
  39. s=(np.linspace(1,numOfMelFilters,numOfMelFilters) - 0.5)/numOfMelFilters
  40. for i in range(0, numOfDctOutputs):
  41. result[i,:]=np.cos(i * np.pi*s) * np.sqrt(2.0/numOfMelFilters)
  42. return result.reshape(numOfDctOutputs*numOfMelFilters)