arm_relu_q7.c 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. /*
  2. * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the License); you may
  7. * not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  14. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /* ----------------------------------------------------------------------
  19. * Project: CMSIS NN Library
  20. * Title: arm_relu_q7.c
  21. * Description: Q7 version of ReLU
  22. *
  23. * $Date: February 27, 2020
  24. * $Revision: V.1.0.1
  25. *
  26. * Target Processor: Cortex-M cores
  27. *
  28. * -------------------------------------------------------------------- */
  29. #include "arm_math.h"
  30. #include "arm_nnfunctions.h"
  31. /**
  32. * @ingroup groupNN
  33. */
  34. /**
  35. * @addtogroup Acti
  36. * @{
  37. */
  38. /**
  39. * @brief Q7 RELU function
  40. * @param[in,out] data pointer to input
  41. * @param[in] size number of elements
  42. *
  43. * @details
  44. *
  45. * Optimized relu with QSUB instructions.
  46. *
  47. */
  48. void arm_relu_q7(q7_t *data, uint16_t size)
  49. {
  50. #if defined(ARM_MATH_DSP)
  51. /* Run the following code for M cores with DSP extension */
  52. uint16_t i = size >> 2;
  53. q7_t *input = data;
  54. q7_t *output = data;
  55. q31_t in;
  56. q31_t buf;
  57. q31_t mask;
  58. while (i)
  59. {
  60. in = read_q7x4_ia(&input);
  61. /* extract the first bit */
  62. buf = __ROR(in & 0x80808080, 7);
  63. /* if MSB=1, mask will be 0xFF, 0x0 otherwise */
  64. mask = __QSUB8(0x00000000, buf);
  65. write_q7x4_ia(&output, in & (~mask));
  66. i--;
  67. }
  68. i = size & 0x3;
  69. while (i)
  70. {
  71. if (*input < 0)
  72. {
  73. *input = 0;
  74. }
  75. input++;
  76. i--;
  77. }
  78. #else
  79. /* Run the following code as reference implementation for cores without DSP extension */
  80. uint16_t i;
  81. for (i = 0; i < size; i++)
  82. {
  83. if (data[i] < 0)
  84. data[i] = 0;
  85. }
  86. #endif
  87. }
  88. /**
  89. * @} end of Acti group
  90. */