mfcc.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. import numpy as np
  2. import cmsisdsp.datatype as dt
  3. def frequencyToMelSpace(freq):
  4. """
  5. Convert a frequency in Hz to Mel space value
  6. :param freq: Frequency in Hz.
  7. :type freq: float
  8. :return: Mel value.
  9. :rtype: float
  10. """
  11. return 1127.0 * np.log(1.0 + freq / 700.0)
  12. def melSpaceToFrequency(mels):
  13. """
  14. Convert a Mel space value to a frequency in Hz
  15. :param freq: Mel value.
  16. :type freq: float
  17. :return: Frequency in Hz.
  18. :rtype: float
  19. """
  20. return 700.0 * (np.exp(mels / 1127.0) - 1.0)
  21. def melFilterMatrix(dtype,fmin, fmax, numOfMelFilters,fs,FFTSize):
  22. """
  23. Sparse matrix in a specific format and encoding the filters in Mel space
  24. :param dtype: The datatype to use for the matrix coefficients.
  25. :type dtype: int
  26. :param fmin: Minimum frequency in Hz.
  27. :type fmin: float
  28. :param fmax: Maximum frequency in Hz.
  29. :type fmax: float
  30. :param numOfMelFilters: Number of Mel filters.
  31. :type numOfMelFilters: int
  32. :param fs: Sampling frequency.
  33. :type fs: int
  34. :param FFTSize: FFT Length.
  35. :type FFTSize: int
  36. :return: A tuple encoding the sparse matrix.
  37. :rtype: A tuple
  38. """
  39. filters = np.zeros((numOfMelFilters,int(FFTSize/2+1)))
  40. zeros = np.zeros(int(FFTSize // 2 ))
  41. fmin_mel = frequencyToMelSpace(fmin)
  42. fmax_mel = frequencyToMelSpace(fmax)
  43. mels = np.linspace(fmin_mel, fmax_mel, num=numOfMelFilters+2)
  44. linearfreqs = np.linspace( 0, fs/2.0, int(FFTSize // 2 + 1) )
  45. spectrogrammels = frequencyToMelSpace(linearfreqs)[1:]
  46. filtPos=[]
  47. filtLen=[]
  48. totalLen = 0
  49. packedFilters = []
  50. for n in range(numOfMelFilters):
  51. upper = (spectrogrammels - mels[n])/(mels[n+1]-mels[n])
  52. lower = (mels[n+2] - spectrogrammels)/(mels[n+2]-mels[n+1])
  53. filters[n, :] = np.hstack([0,np.maximum(zeros,np.minimum(upper,lower))])
  54. nb = 0
  55. startFound = False
  56. for sample in filters[n, :]:
  57. if not startFound and sample != 0.0:
  58. startFound = True
  59. startPos = nb
  60. if startFound and sample == 0.0:
  61. endPos = nb - 1
  62. break
  63. nb = nb + 1
  64. filtLen.append(endPos - startPos+1)
  65. totalLen += endPos - startPos + 1
  66. filtPos.append(startPos)
  67. packedFilters += list(filters[n, startPos:endPos+1])
  68. return filtLen,filtPos,dt.convert(packedFilters,dtype)
  69. def dctMatrix(dtype,numOfDctOutputs, numOfMelFilters):
  70. """
  71. Dct matrix in a specific format
  72. :param dtype: The datatype to use for the matrix coefficients.
  73. :type dtype: int
  74. :param numOfDctOutputs: Number of DCT bands.
  75. :type numOfDctOutputs: int
  76. :param numOfMelFilters: Number of Mel filters.
  77. :type numOfMelFilters: int
  78. :return: The dct matrix.
  79. :rtype: array of dtype
  80. """
  81. result = np.zeros((numOfDctOutputs,numOfMelFilters))
  82. s=(np.linspace(1,numOfMelFilters,numOfMelFilters) - 0.5)/numOfMelFilters
  83. for i in range(0, numOfDctOutputs):
  84. result[i,:]=np.cos(i * np.pi*s) * np.sqrt(2.0/numOfMelFilters)
  85. return dt.convert(result.reshape(numOfDctOutputs*numOfMelFilters),dtype)