AppNodes.h 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: AppNodes.h
  4. * Description: Application nodes for the C compute graph
  5. *
  6. * $Date: 16 March 2022
  7. *
  8. * Target Processor: Cortex-M and Cortex-A cores
  9. * -------------------------------------------------------------------- */
  10. /*
  11. * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
  12. *
  13. * SPDX-License-Identifier: Apache-2.0
  14. *
  15. * Licensed under the Apache License, Version 2.0 (the License); you may
  16. * not use this file except in compliance with the License.
  17. * You may obtain a copy of the License at
  18. *
  19. * www.apache.org/licenses/LICENSE-2.0
  20. *
  21. * Unless required by applicable law or agreed to in writing, software
  22. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  23. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  24. * See the License for the specific language governing permissions and
  25. * limitations under the License.
  26. */
  27. #ifndef _APPNODES_H_
  28. #define _APPNODES_H_
  29. #include <hal/nrf_pdm.h>
  30. #include "coef.h"
  31. #include <Arduino.h>
  32. #include <HardwareSerial.h>
  33. // When enabled, lots of additional trace is generated
  34. //#define DEBUG
  35. // Buffer to read samples into, each sample is 16-bits
  36. // This is written by the PDM driver
  37. extern short sampleBuffer[AUDIOBUFFER_LENGTH];
  38. // Number of audio samples available in the audio buffer
  39. extern volatile int samplesRead;
  40. // Sink node. It is just printing the recognized word
  41. template<typename IN, int inputSize> class Sink;
  42. template<int inputSize>
  43. class Sink<q15_t, inputSize>: public GenericSink<q15_t, inputSize>
  44. {
  45. public:
  46. Sink(FIFOBase<q15_t> &src):GenericSink<q15_t,inputSize>(src){};
  47. int run()
  48. {
  49. #if defined(DEBUG)
  50. Serial.println("==== Sink");
  51. #endif
  52. q15_t *b=this->getReadBuffer();
  53. if (b[0]==-1)
  54. {
  55. Serial.println("Unknown");
  56. };
  57. if (b[0]==0)
  58. {
  59. Serial.println("Yes");
  60. };
  61. return(0);
  62. };
  63. };
  64. // Source node. It is getting audio data from the PDM driver
  65. template<typename OUT, int outputSize> class Source;
  66. template<int outputSize>
  67. class Source<q15_t,outputSize>: public GenericSource<q15_t,outputSize>
  68. {
  69. public:
  70. Source(FIFOBase<q15_t> &dst):GenericSource<q15_t,outputSize>(dst)
  71. {
  72. };
  73. int run(){
  74. #if defined(DEBUG)
  75. Serial.println("==== Source");
  76. #endif
  77. q15_t *b=this->getWriteBuffer();
  78. // We wait until enough samples are available.
  79. // In a future version we may experiment with sleeping the board
  80. while(samplesRead<outputSize)
  81. {
  82. #if defined(DEBUG)
  83. Serial.print("Sample reads ");
  84. Serial.println(samplesRead);
  85. #endif
  86. };
  87. #if defined(DEBUG)
  88. Serial.println("Received");
  89. #endif
  90. // We get the samples and update the
  91. // sampleBuffer.
  92. // Since this buffer is also accessed by the IRQ, we need to disable it
  93. NVIC_DisableIRQ(PDM_IRQn);
  94. memcpy(b,sampleBuffer,sizeof(q15_t)*outputSize);
  95. if ((samplesRead-outputSize) > 0)
  96. {
  97. memmove(sampleBuffer,sampleBuffer+outputSize,sizeof(q15_t)*(samplesRead-outputSize));
  98. }
  99. samplesRead = samplesRead - outputSize;
  100. NVIC_EnableIRQ(PDM_IRQn);
  101. #if defined(DEBUG)
  102. Serial.print("After read : Sample reads ");
  103. Serial.println(samplesRead);
  104. #endif
  105. return(0);
  106. };
  107. };
  108. template<typename IN, int inputSize,typename OUT,int outputSize> class FIR;
  109. // FIR node
  110. template<int inputSize>
  111. class FIR<q15_t,inputSize,q15_t,inputSize>: public GenericNode<q15_t,inputSize,q15_t,inputSize>
  112. {
  113. public:
  114. FIR(FIFOBase<q15_t> &src,FIFOBase<q15_t> &dst):GenericNode<q15_t,inputSize,q15_t,inputSize>(src,dst){
  115. int blockSize=inputSize;
  116. int numTaps=10;
  117. int stateLength = numTaps + blockSize - 1;
  118. state=(q15_t*)malloc(stateLength * sizeof(q15_t*));
  119. };
  120. int run(){
  121. #if defined(DEBUG)
  122. Serial.println("==== FIR");
  123. #endif
  124. q15_t *a=this->getReadBuffer();
  125. q15_t *b=this->getWriteBuffer();
  126. int blockSize=inputSize;
  127. int stateLength = NUMTAPS + blockSize - 1;
  128. arm_status status=arm_fir_init_q15(&(this->firq15),NUMTAPS,fir_coefs,state,blockSize);
  129. arm_fir_q15(&(this->firq15),a,b,blockSize);
  130. return(0);
  131. };
  132. arm_fir_instance_q15 firq15;
  133. q15_t *state;
  134. };
  135. /* Not available in the older CMSIS-DSP version provided with Arduino.
  136. So we copy the definition here */
  137. arm_status arm_divide_q15(q15_t numerator,
  138. q15_t denominator,
  139. q15_t *quotient,
  140. int16_t *shift)
  141. {
  142. int16_t sign=0;
  143. q31_t temp;
  144. int16_t shiftForNormalizing;
  145. *shift = 0;
  146. sign = (numerator>>15) ^ (denominator>>15);
  147. if (denominator == 0)
  148. {
  149. if (sign)
  150. {
  151. *quotient = 0x8000;
  152. }
  153. else
  154. {
  155. *quotient = 0x7FFF;
  156. }
  157. return(ARM_MATH_NANINF);
  158. }
  159. numerator = abs(numerator);
  160. denominator = abs(denominator);
  161. temp = ((q31_t)numerator << 15) / ((q31_t)denominator);
  162. shiftForNormalizing= 17 - __CLZ(temp);
  163. if (shiftForNormalizing > 0)
  164. {
  165. *shift = shiftForNormalizing;
  166. temp = temp >> shiftForNormalizing;
  167. }
  168. if (sign)
  169. {
  170. temp = -temp;
  171. }
  172. *quotient=temp;
  173. return(ARM_MATH_SUCCESS);
  174. }
  175. // We similar to the Python implementation
  176. q15_t dsp_zcr_q15(q15_t *w,int blockSize)
  177. {
  178. q15_t m;
  179. arm_mean_q15(w,blockSize,&m);
  180. // Negate can saturate so we use CMSIS-DSP function which is working on array (and we have a scalar)
  181. arm_negate_q15(&m,&m,1);
  182. arm_offset_q15(w,m,w,blockSize);
  183. int k=0;
  184. for(int i=0;i<blockSize-1;i++)
  185. {
  186. int f = w[i];
  187. int g = w[i+1];
  188. if ((((f>0) && (g<0)) || ((f<0) && (g>0))) && g>f)
  189. {
  190. k++;
  191. }
  192. }
  193. // k < len(f) so shift should be 0 except when k == len(f)
  194. // When k==len(f) normally quotient is 0x4000 and shift 1 and we convert
  195. // this to 0x7FFF
  196. q15_t quotient;
  197. int16_t shift;
  198. arm_status status=arm_divide_q15(k,blockSize-1,&quotient,&shift);
  199. if (shift==1)
  200. {
  201. arm_shift_q15(&quotient,shift,&quotient,1);
  202. return(quotient);
  203. }
  204. else
  205. {
  206. return(quotient);
  207. }
  208. };
  209. template<typename IN, int inputSize,typename OUT,int outputSize> class Feature;
  210. template<int inputSize>
  211. class Feature<q15_t,inputSize,q15_t,1>: public GenericNode<q15_t,inputSize,q15_t,1>
  212. {
  213. public:
  214. Feature(FIFOBase<q15_t> &src,FIFOBase<q15_t> &dst,const q15_t *window):
  215. GenericNode<q15_t,inputSize,q15_t,1>(src,dst),mWindow(window){
  216. };
  217. int run(){
  218. #if defined(DEBUG)
  219. Serial.println("==== Feature");
  220. #endif
  221. q15_t *a=this->getReadBuffer();
  222. q15_t *b=this->getWriteBuffer();
  223. arm_mult_q15(a,this->mWindow,a,inputSize);
  224. b[0] = dsp_zcr_q15(a,inputSize);
  225. return(0);
  226. };
  227. const q15_t* mWindow;
  228. };
  229. template<typename IN, int inputSize,typename OUT,int outputSize> class KWS;
  230. template<int inputSize>
  231. class KWS<q15_t,inputSize,q15_t,1>: public GenericNode<q15_t,inputSize,q15_t,1>
  232. {
  233. public:
  234. KWS(FIFOBase<q15_t> &src,FIFOBase<q15_t> &dst,
  235. const q15_t* coef_q15,
  236. const int coef_shift,
  237. const q15_t intercept_q15,
  238. const int intercept_shift):GenericNode<q15_t,inputSize,q15_t,1>(src,dst),
  239. mCoef_q15(coef_q15),
  240. mCoef_shift(coef_shift),
  241. mIntercept_q15(intercept_q15),
  242. mIntercept_shift(intercept_shift)
  243. {
  244. };
  245. int run(){
  246. #if defined(DEBUG)
  247. Serial.println("==== KWS");
  248. #endif
  249. q15_t *a=this->getReadBuffer();
  250. q15_t *b=this->getWriteBuffer();
  251. q63_t res;
  252. arm_dot_prod_q15(this->mCoef_q15,a,inputSize,&res);
  253. q15_t scaled;
  254. arm_shift_q15(&(this->mIntercept_q15),this->mIntercept_shift-this->mCoef_shift,&scaled,1);
  255. // Because dot prod output is in Q34.30
  256. // and ret is on 64 bits
  257. q63_t scaled_Q30 = (q63_t)(scaled) << 15;
  258. res = res + scaled_Q30;
  259. if (res<0)
  260. {
  261. b[0]=-1;
  262. }
  263. else
  264. {
  265. b[0]=0;
  266. }
  267. return(0);
  268. };
  269. const q15_t* mCoef_q15;
  270. const int mCoef_shift;
  271. const q15_t mIntercept_q15;
  272. const int mIntercept_shift;
  273. };
  274. #endif