/* * Copyright (c) 2019 Nuclei Limited. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the License); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an AS IS BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __CORE_FEATURE_DSP__ #define __CORE_FEATURE_DSP__ /*! * @file core_feature_dsp.h * @brief DSP feature API header file for Nuclei N/NX Core */ /* * DSP Feature Configuration Macro: * 1. __DSP_PRESENT: Define whether Digital Signal Processing Unit(DSP) is present or not * * 0: Not present * * 1: Present */ #ifdef __cplusplus extern "C" { #endif #include "core_feature_base.h" #if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) #if defined(__INC_INTRINSIC_API) && (__INC_INTRINSIC_API == 1) #if defined(__zcc__) #include #else #if !defined(__ICCRISCV__) && !defined(__llvm__) #include #endif #endif #endif #ifndef __ICCRISCV__ /* ########################### CPU SIMD DSP Intrinsic Functions ########################### */ /** * \defgroup NMSIS_Core_DSP_Intrinsic Intrinsic Functions for SIMD Instructions * \ingroup NMSIS_Core * \brief Functions that generate RISC-V DSP SIMD instructions. * \details * * The following functions generate specified RISC-V SIMD instructions that cannot be directly accessed by compiler. * * **DSP ISA Extension Instruction Summary** * + **Shorthand Definitions** * - r.H == rH1: r[31:16], r.L == r.H0: r[15:0] * - r.B3: r[31:24], r.B2: r[23:16], r.B1: r[15:8], r.B0: r[7:0] * - r.B[x]: r[(x*8+7):(x*8+0)] * - r.H[x]: r[(x*16+7):(x*16+0)] * - r.W[x]: r[(x*32+31):(x*32+0)] * - r[xU]: the upper 32-bit of a 64-bit number; xU represents the GPR number that contains this upper part 32-bit value. * - r[xL]: the lower 32-bit of a 64-bit number; xL represents the GPR number that contains this lower part 32-bit value. * - r[xU].r[xL]: a 64-bit number that is formed from a pair of GPRs. * - s>>: signed arithmetic right shift: * - u>>: unsigned logical right shift * - SAT.Qn(): Saturate to the range of [-2^n, 2^n-1], if saturation happens, set PSW.OV. * - SAT.Um(): Saturate to the range of [0, 2^m-1], if saturation happens, set PSW.OV. * - RUND(): Indicate `rounding`, i.e., add 1 to the most significant discarded bit for right shift or MSW-type multiplication instructions. * - Sign or Zero Extending functions: * - SEm(data): Sign-Extend data to m-bit.: * - ZEm(data): Zero-Extend data to m-bit. * - ABS(x): Calculate the absolute value of `x`. * - CONCAT(x,y): Concatinate `x` and `y` to form a value. * - u<: Unsinged less than comparison. * - u<=: Unsinged less than & equal comparison. * - u>: Unsinged greater than comparison. * - s*: Signed multiplication. * - u*: Unsigned multiplication. * * @{ */ /** @} */ /* End of Doxygen Group NMSIS_Core_DSP_Intrinsic */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS SIMD Data Processing Instructions * \ingroup NMSIS_Core_DSP_Intrinsic * \brief SIMD Data Processing Instructions * \details */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB SIMD 16-bit Add/Subtract Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS * \brief SIMD 16-bit Add/Subtract Instructions * \details * Based on the combination of the types of the two 16-bit arithmetic operations, the SIMD 16-bit * add/subtract instructions can be classified into 6 main categories: Addition (two 16-bit addition), * Subtraction (two 16-bit subtraction), Crossed Add & Sub (one addition and one subtraction), and * Crossed Sub & Add (one subtraction and one addition), Straight Add & Sub (one addition and one * subtraction), and Straight Sub & Add (one subtraction and one addition). * Based on the way of how an overflow condition is handled, the SIMD 16-bit add/subtract * instructions can be classified into 5 groups: Wrap-around (dropping overflow), Signed Halving * (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed Saturation (clipping overflow), * and Unsigned Saturation. * Together, there are 30 SIMD 16-bit add/subtract instructions. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB SIMD 8-bit Addition & Subtraction Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS * \brief SIMD 8-bit Addition & Subtraction Instructions * \details * Based on the types of the four 8-bit arithmetic operations, the SIMD 8-bit add/subtract instructions * can be classified into 2 main categories: Addition (four 8-bit addition), and Subtraction (four 8-bit * subtraction). * Based on the way of how an overflow condition is handled for singed or unsigned operation, the * SIMD 8-bit add/subtract instructions can be classified into 5 groups: Wrap-around (dropping * overflow), Signed Halving (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed * Saturation (clipping overflow), and Unsigned Saturation. * Together, there are 10 SIMD 8-bit add/subtract instructions. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT SIMD 16-bit Shift Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS * \brief SIMD 16-bit Shift Instructions * \details * there are 14 SIMD 16-bit shift instructions. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT SIMD 8-bit Shift Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS * \brief SIMD 8-bit Shift Instructions * \details * there are 14 SIMD 8-bit shift instructions. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP SIMD 16-bit Compare Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS * \brief SIMD 16-bit Compare Instructions * \details * there are 5 SIMD 16-bit Compare instructions. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP SIMD 8-bit Compare Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS * \brief SIMD 8-bit Compare Instructions * \details * there are 5 SIMD 8-bit Compare instructions. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY SIMD 16-bit Multiply Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS * \brief SIMD 16-bit Multiply Instructions * \details * there are 6 SIMD 16-bit Multiply instructions. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY SIMD 8-bit Multiply Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS * \brief SIMD 8-bit Multiply Instructions * \details * there are 6 SIMD 8-bit Multiply instructions. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC SIMD 16-bit Miscellaneous Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS * \brief SIMD 16-bit Miscellaneous Instructions * \details * there are 10 SIMD 16-bit Misc instructions. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC SIMD 8-bit Miscellaneous Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS * \brief SIMD 8-bit Miscellaneous Instructions * \details * there are 10 SIMD 8-bit Miscellaneous instructions. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK SIMD 8-bit Unpacking Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS * \brief SIMD 8-bit Unpacking Instructions * \details * there are 8 SIMD 8-bit Unpacking instructions. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD Non-SIMD Instructions * \ingroup NMSIS_Core_DSP_Intrinsic * \brief Non-SIMD Instructions * \details */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU Non-SIMD Q15 saturation ALU Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD * \brief Non-SIMD Q15 saturation ALU Instructions * \details * there are 7 Non-SIMD Q15 saturation ALU Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU Non-SIMD Q31 saturation ALU Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD * \brief Non-SIMD Q31 saturation ALU Instructions * \details * there are Non-SIMD Q31 saturation ALU Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION 32-bit Computation Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD * \brief 32-bit Computation Instructions * \details * there are 8 32-bit Computation Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC OV (Overflow) flag Set/Clear Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD * \brief OV (Overflow) flag Set/Clear Instructions * \details * The following table lists the user instructions related to Overflow (OV) flag manipulation. there are 2 OV (Overflow) flag Set/Clear Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC Non-SIMD Miscellaneous Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD * \brief Non-SIMD Miscellaneous Instructions * \details * There are 13 Miscellaneous Instructions here. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS Partial-SIMD Data Processing Instructions * \ingroup NMSIS_Core_DSP_Intrinsic * \brief Partial-SIMD Data Processing Instructions * \details */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK SIMD 16-bit Packing Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS * \brief SIMD 16-bit Packing Instructions * \details * there are 4 SIMD16-bit Packing Instructions. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC Signed MSW 32x32 Multiply and Add Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS * \brief Signed MSW 32x32 Multiply and Add Instructions * \details * there are 8 Signed MSW 32x32 Multiply and Add Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC Signed MSW 32x16 Multiply and Add Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS * \brief Signed MSW 32x16 Multiply and Add Instructions * \details * there are 15 Signed MSW 32x16 Multiply and Add Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB Signed 16-bit Multiply 32-bit Add/Subtract Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS * \brief Signed 16-bit Multiply 32-bit Add/Subtract Instructions * \details * there are 18 Signed 16-bit Multiply 32-bit Add/Subtract Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB Signed 16-bit Multiply 64-bit Add/Subtract Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS * \brief Signed 16-bit Multiply 64-bit Add/Subtract Instructions * \details * there is Signed 16-bit Multiply 64-bit Add/Subtract Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC Partial-SIMD Miscellaneous Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS * \brief Partial-SIMD Miscellaneous Instructions * \details * there are 7 Partial-SIMD Miscellaneous Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD 8-bit Multiply with 32-bit Add Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS * \brief 8-bit Multiply with 32-bit Add Instructions * \details * there are 3 8-bit Multiply with 32-bit Add Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE 64-bit Profile Instructions * \ingroup NMSIS_Core_DSP_Intrinsic * \brief 64-bit Profile Instructions * \details */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB 64-bit Addition & Subtraction Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE * \brief 64-bit Addition & Subtraction Instructions * \details * there are 10 64-bit Addition & Subtraction Instructions. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB 32-bit Multiply with 64-bit Add/Subtract Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE * \brief 32-bit Multiply with 64-bit Add/Subtract Instructions * \details * there are 32-bit Multiply 64-bit Add/Subtract Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB Signed 16-bit Multiply with 64-bit Add/Subtract Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE * \brief Signed 16-bit Multiply with 64-bit Add/Subtract Instructions * \details * there are 10 Signed 16-bit Multiply with 64-bit Add/Subtract Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY RV64 Only Instructions * \ingroup NMSIS_Core_DSP_Intrinsic * \brief RV64 Only Instructions * \details */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB (RV64 Only) SIMD 32-bit Add/Subtract Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY * \brief (RV64 Only) SIMD 32-bit Add/Subtract Instructions * \details * The following tables list instructions that are only present in RV64. * There are 30 SIMD 32-bit addition or subtraction instructions.there are 4 SIMD16-bit Packing Instructions. */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT (RV64 Only) SIMD 32-bit Shift Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY * \brief (RV64 Only) SIMD 32-bit Shift Instructions * \details * there are 14 (RV64 Only) SIMD 32-bit Shift Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC (RV64 Only) SIMD 32-bit Miscellaneous Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY * \brief (RV64 Only) SIMD 32-bit Miscellaneous Instructions * \details * there are 5 (RV64 Only) SIMD 32-bit Miscellaneous Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT (RV64 Only) SIMD Q15 Saturating Multiply Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY * \brief (RV64 Only) SIMD Q15 Saturating Multiply Instructions * \details * there are 9 (RV64 Only) SIMD Q15 saturating Multiply Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT (RV64 Only) 32-bit Multiply Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY * \brief (RV64 Only) 32-bit Multiply Instructions * \details * there is 3 RV64 Only) 32-bit Multiply Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD (RV64 Only) 32-bit Multiply & Add Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY * \brief (RV64 Only) 32-bit Multiply & Add Instructions * \details * there are 3 (RV64 Only) 32-bit Multiply & Add Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC (RV64 Only) 32-bit Parallel Multiply & Add Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY * \brief (RV64 Only) 32-bit Parallel Multiply & Add Instructions * \details * there are 12 (RV64 Only) 32-bit Parallel Multiply & Add Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT (RV64 Only) Non-SIMD 32-bit Shift Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY * \brief (RV64 Only) Non-SIMD 32-bit Shift Instructions * \details * there are 1 (RV64 Only) Non-SIMD 32-bit Shift Instructions */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK 32-bit Packing Instructions * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY * \brief 32-bit Packing Instructions * \details * There are four 32-bit packing instructions here */ /* ===== Inline Function Start for 3.1. ADD8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB * \brief ADD8 (SIMD 8-bit Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * ADD8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit integer element additions simultaneously. * * **Description**:\n * This instruction adds the 8-bit integer elements in Rs1 with the 8-bit integer elements * in Rs2, and then writes the 8-bit element results to Rd. * * **Note**:\n * This instruction can be used for either signed or unsigned addition. * * **Operations**:\n * ~~~ * Rd.B[x] = Rs1.B[x] + Rs2.B[x]; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_ADD8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("add8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.1. ADD8 ===== */ /* ===== Inline Function Start for 3.2. ADD16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief ADD16 (SIMD 16-bit Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * ADD16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit integer element additions simultaneously. * * **Description**:\n * This instruction adds the 16-bit integer elements in Rs1 with the 16-bit integer * elements in Rs2, and then writes the 16-bit element results to Rd. * * **Note**:\n * This instruction can be used for either signed or unsigned addition. * * **Operations**:\n * ~~~ * Rd.H[x] = Rs1.H[x] + Rs2.H[x]; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_ADD16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("add16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.2. ADD16 ===== */ /* ===== Inline Function Start for 3.3. ADD64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB * \brief ADD64 (64-bit Addition) * \details * **Type**: 64-bit Profile * * **Syntax**:\n * ~~~ * ADD64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Add two 64-bit signed or unsigned integers. * * **RV32 Description**:\n * This instruction adds the 64-bit integer of an even/odd pair of registers specified * by Rs1(4,1) with the 64-bit integer of an even/odd pair of registers specified by Rs2(4,1), and then * writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1). * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register * pair includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * This instruction has the same behavior as the ADD instruction in RV64I. * * **Note**:\n * This instruction can be used for either signed or unsigned addition. * * **Operations**:\n * ~~~ * RV32: * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1); * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1); * R[t_H].R[t_L] = R[a_H].R[a_L] + R[b_H].R[b_L]; * RV64: * Rd = Rs1 + Rs2; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_ADD64(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("add64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.3. ADD64 ===== */ /* ===== Inline Function Start for 3.4. AVE ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC * \brief AVE (Average with Rounding) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * AVE Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Calculate the average of the contents of two general registers. * * **Description**:\n * This instruction calculates the average value of two signed integers stored in Rs1 and * Rs2, rounds up a half-integer result to the nearest integer, and writes the result to Rd. * * **Operations**:\n * ~~~ * Sum = CONCAT(Rs1[MSB],Rs1[MSB:0]) + CONCAT(Rs2[MSB],Rs2[MSB:0]) + 1; * Rd = Sum[(MSB+1):1]; * for RV32: MSB=31, * for RV64: MSB=63 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_AVE(long a, long b) { long result; __ASM volatile("ave %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.4. AVE ===== */ /* ===== Inline Function Start for 3.5. BITREV ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC * \brief BITREV (Bit Reverse) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * BITREV Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Reverse the bit positions of the source operand within a specified width starting from bit * 0. The reversed width is a variable from a GPR. * * **Description**:\n * This instruction reverses the bit positions of the content of Rs1. The reversed bit width * is calculated as Rs2[4:0]+1 (RV32) or Rs2[5:0]+1 (RV64). The upper bits beyond the reversed width * are filled with zeros. After the bit reverse operation, the result is written to Rd. * * **Operations**:\n * ~~~ * msb = Rs2[4:0]; (for RV32) * msb = Rs2[5:0]; (for RV64) * rev[0:msb] = Rs1[msb:0]; * Rd = ZE(rev[msb:0]); * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_BITREV(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("bitrev %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.5. BITREV ===== */ /* ===== Inline Function Start for 3.6. BITREVI ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC * \brief BITREVI (Bit Reverse Immediate) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * (RV32) BITREVI Rd, Rs1, imm[4:0] * (RV64) BITREVI Rd, Rs1, imm[5:0] * ~~~ * * **Purpose**:\n * Reverse the bit positions of the source operand within a specified width starting from bit * 0. The reversed width is an immediate value. * * **Description**:\n * This instruction reverses the bit positions of the content of Rs1. The reversed bit width * is calculated as imm[4:0]+1 (RV32) or imm[5:0]+1 (RV64). The upper bits beyond the reversed width * are filled with zeros. After the bit reverse operation, the result is written to Rd. * * **Operations**:\n * ~~~ * msb = imm[4:0]; (RV32) * msb = imm[5:0]; (RV64) * rev[0:msb] = Rs1[msb:0]; * Rd = ZE(rev[msb:0]); * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ #define __RV_BITREVI(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("bitrevi %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.6. BITREVI ===== */ /* ===== Inline Function Start for 3.7. BPICK ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC * \brief BPICK (Bit-wise Pick) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * BPICK Rd, Rs1, Rs2, Rc * ~~~ * * **Purpose**:\n * Select from two source operands based on a bit mask in the third operand. * * **Description**:\n * This instruction selects individual bits from Rs1 or Rs2, based on the bit mask value in * Rc. If a bit in Rc is 1, the corresponding bit is from Rs1; otherwise, the corresponding bit is from Rs2. * The selection results are written to Rd. * * **Operations**:\n * ~~~ * Rd[x] = Rc[x]? Rs1[x] : Rs2[x]; * for RV32, x=31...0 * for RV64, x=63...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \param [in] c unsigned long type of value stored in c * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_BPICK(unsigned long a, unsigned long b, unsigned long c) { unsigned long result; __ASM volatile("bpick %0, %1, %2, %3" : "=r"(result) : "r"(a), "r"(b), "r"(c)); return result; } /* ===== Inline Function End for 3.7. BPICK ===== */ /* ===== Inline Function Start for 3.8. CLROV ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC * \brief CLROV (Clear OV flag) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * CLROV # pseudo mnemonic * ~~~ * * **Purpose**:\n * This pseudo instruction is an alias to `CSRRCI x0, ucode, 1` instruction. * * */ __STATIC_FORCEINLINE void __RV_CLROV(void) { __ASM volatile("clrov "); } /* ===== Inline Function End for 3.8. CLROV ===== */ /* ===== Inline Function Start for 3.9. CLRS8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC * \brief CLRS8 (SIMD 8-bit Count Leading Redundant Sign) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * CLRS8 Rd, Rs1 * ~~~ * * **Purpose**:\n * Count the number of redundant sign bits of the 8-bit elements of a general register. * * **Description**:\n * Starting from the bits next to the sign bits of the 8-bit elements of Rs1, this instruction * counts the number of redundant sign bits and writes the result to the corresponding 8-bit elements * of Rd. * * **Operations**:\n * ~~~ * snum[x] = Rs1.B[x]; * cnt[x] = 0; * for (i = 6 to 0) { * if (snum[x](i) == snum[x](7)) { * cnt[x] = cnt[x] + 1; * } else { * break; * } * } * Rd.B[x] = cnt[x]; * for RV32: x=3...0 * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CLRS8(unsigned long a) { unsigned long result; __ASM volatile("clrs8 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.9. CLRS8 ===== */ /* ===== Inline Function Start for 3.10. CLRS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC * \brief CLRS16 (SIMD 16-bit Count Leading Redundant Sign) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * CLRS16 Rd, Rs1 * ~~~ * * **Purpose**:\n * Count the number of redundant sign bits of the 16-bit elements of a general register. * * **Description**:\n * Starting from the bits next to the sign bits of the 16-bit elements of Rs1, this * instruction counts the number of redundant sign bits and writes the result to the corresponding 16- * bit elements of Rd. * * **Operations**:\n * ~~~ * snum[x] = Rs1.H[x]; * cnt[x] = 0; * for (i = 14 to 0) { * if (snum[x](i) == snum[x](15)) { * cnt[x] = cnt[x] + 1; * } else { * break; * } * } * Rd.H[x] = cnt[x]; * for RV32: x=1...0 * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CLRS16(unsigned long a) { unsigned long result; __ASM volatile("clrs16 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.10. CLRS16 ===== */ /* ===== Inline Function Start for 3.11. CLRS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC * \brief CLRS32 (SIMD 32-bit Count Leading Redundant Sign) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * CLRS32 Rd, Rs1 * ~~~ * * **Purpose**:\n * Count the number of redundant sign bits of the 32-bit elements of a general register. * * **Description**:\n * Starting from the bits next to the sign bits of the 32-bit elements of Rs1, this * instruction counts the number of redundant sign bits and writes the result to the corresponding 32- * bit elements of Rd. * * **Operations**:\n * ~~~ * snum[x] = Rs1.W[x]; * cnt[x] = 0; * for (i = 30 to 0) { * if (snum[x](i) == snum[x](31)) { * cnt[x] = cnt[x] + 1; * } else { * break; * } * } * Rd.W[x] = cnt[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CLRS32(unsigned long a) { unsigned long result; __ASM volatile("clrs32 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.11. CLRS32 ===== */ /* ===== Inline Function Start for 3.12. CLO8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC * \brief CLO8 (SIMD 8-bit Count Leading One) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * CLO8 Rd, Rs1 * ~~~ * * **Purpose**:\n * Count the number of leading one bits of the 8-bit elements of a general register. * * **Description**:\n * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction * counts the number of leading one bits and writes the results to the corresponding 8-bit elements of * Rd. * * **Operations**:\n * ~~~ * snum[x] = Rs1.B[x]; * cnt[x] = 0; * for (i = 7 to 0) { * if (snum[x](i) == 1) { * cnt[x] = cnt[x] + 1; * } else { * break; * } * } * Rd.B[x] = cnt[x]; * for RV32: x=3...0 * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CLO8(unsigned long a) { unsigned long result; __ASM volatile("clo8 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.12. CLO8 ===== */ /* ===== Inline Function Start for 3.13. CLO16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC * \brief CLO16 (SIMD 16-bit Count Leading One) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * CLO16 Rd, Rs1 * ~~~ * * **Purpose**:\n * Count the number of leading one bits of the 16-bit elements of a general register. * * **Description**:\n * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction * counts the number of leading one bits and writes the results to the corresponding 16-bit elements * of Rd. * * **Operations**:\n * ~~~ * snum[x] = Rs1.H[x]; * cnt[x] = 0; * for (i = 15 to 0) { * if (snum[x](i) == 1) { * cnt[x] = cnt[x] + 1; * } else { * break; * } * } * Rd.H[x] = cnt[x]; * for RV32: x=1...0 * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CLO16(unsigned long a) { unsigned long result; __ASM volatile("clo16 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.13. CLO16 ===== */ /* ===== Inline Function Start for 3.14. CLO32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC * \brief CLO32 (SIMD 32-bit Count Leading One) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * CLO32 Rd, Rs1 * ~~~ * * **Purpose**:\n * Count the number of leading one bits of the 32-bit elements of a general register. * * **Description**:\n * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction * counts the number of leading one bits and writes the results to the corresponding 32-bit elements * of Rd. * * **Operations**:\n * ~~~ * snum[x] = Rs1.W[x]; * cnt[x] = 0; * for (i = 31 to 0) { * if (snum[x](i) == 1) { * cnt[x] = cnt[x] + 1; * } else { * break; * } * } * Rd.W[x] = cnt[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CLO32(unsigned long a) { unsigned long result; __ASM volatile("clo32 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.14. CLO32 ===== */ /* ===== Inline Function Start for 3.15. CLZ8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC * \brief CLZ8 (SIMD 8-bit Count Leading Zero) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * CLZ8 Rd, Rs1 * ~~~ * * **Purpose**:\n * Count the number of leading zero bits of the 8-bit elements of a general register. * * **Description**:\n * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction * counts the number of leading zero bits and writes the results to the corresponding 8-bit elements of * Rd. * * **Operations**:\n * ~~~ * snum[x] = Rs1.B[x]; * cnt[x] = 0; * for (i = 7 to 0) { * if (snum[x](i) == 0) { * cnt[x] = cnt[x] + 1; * } else { * break; * } * } * Rd.B[x] = cnt[x]; * for RV32: x=3...0 * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CLZ8(unsigned long a) { unsigned long result; __ASM volatile("clz8 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.15. CLZ8 ===== */ /* ===== Inline Function Start for 3.16. CLZ16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC * \brief CLZ16 (SIMD 16-bit Count Leading Zero) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * CLZ16 Rd, Rs1 * ~~~ * * **Purpose**:\n * Count the number of leading zero bits of the 16-bit elements of a general register. * * **Description**:\n * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction * counts the number of leading zero bits and writes the results to the corresponding 16-bit elements * of Rd. * * **Operations**:\n * ~~~ * snum[x] = Rs1.H[x]; * cnt[x] = 0; * for (i = 15 to 0) { * if (snum[x](i) == 0) { * cnt[x] = cnt[x] + 1; * } else { * break; * } * } * Rd.H[x] = cnt[x]; * for RV32: x=1...0 * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CLZ16(unsigned long a) { unsigned long result; __ASM volatile("clz16 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.16. CLZ16 ===== */ /* ===== Inline Function Start for 3.17. CLZ32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC * \brief CLZ32 (SIMD 32-bit Count Leading Zero) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * CLZ32 Rd, Rs1 * ~~~ * * **Purpose**:\n * Count the number of leading zero bits of the 32-bit elements of a general register. * * **Description**:\n * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction * counts the number of leading zero bits and writes the results to the corresponding 32-bit elements * of Rd. * * **Operations**:\n * ~~~ * snum[x] = Rs1.W[x]; * cnt[x] = 0; * for (i = 31 to 0) { * if (snum[x](i) == 0) { * cnt[x] = cnt[x] + 1; * } else { * break; * } * } * Rd.W[x] = cnt[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CLZ32(unsigned long a) { unsigned long result; __ASM volatile("clz32 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.17. CLZ32 ===== */ /* ===== Inline Function Start for 3.18. CMPEQ8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP * \brief CMPEQ8 (SIMD 8-bit Integer Compare Equal) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * CMPEQ8 Rs, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit integer elements equal comparisons simultaneously. * * **Description**:\n * This instruction compares the 8-bit integer elements in Rs1 with the 8-bit integer * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFF; otherwise, the result is * 0x0. The 8-bit element comparison results are written to Rd. * * **Note**:\n * This instruction can be used for either signed or unsigned numbers. * * **Operations**:\n * ~~~ * Rd.B[x] = (Rs1.B[x] == Rs2.B[x])? 0xff : 0x0; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CMPEQ8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("cmpeq8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.18. CMPEQ8 ===== */ /* ===== Inline Function Start for 3.19. CMPEQ16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP * \brief CMPEQ16 (SIMD 16-bit Integer Compare Equal) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * CMPEQ16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit integer elements equal comparisons simultaneously. * * **Description**:\n * This instruction compares the 16-bit integer elements in Rs1 with the 16-bit integer * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFFFF; otherwise, the result * is 0x0. The 16-bit element comparison results are written to Rt. * * **Note**:\n * This instruction can be used for either signed or unsigned numbers. * * **Operations**:\n * ~~~ * Rd.H[x] = (Rs1.H[x] == Rs2.H[x])? 0xffff : 0x0; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CMPEQ16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("cmpeq16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.19. CMPEQ16 ===== */ /* ===== Inline Function Start for 3.20. CRAS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief CRAS16 (SIMD 16-bit Cross Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * CRAS16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit * chunk simultaneously. Operands are from crossed positions in 32-bit chunks. * * **Description**:\n * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with * the 16-bit integer element in [15:0] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [31:16] of 32-bit chunks in * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32- * bit chunks in Rd. * * **Note**:\n * This instruction can be used for either signed or unsigned operations. * * **Operations**:\n * ~~~ * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][15:0]; * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][31:16]; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CRAS16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("cras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.20. CRAS16 ===== */ /* ===== Inline Function Start for 3.21. CRSA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief CRSA16 (SIMD 16-bit Cross Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * CRSA16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit * chunk simultaneously. Operands are from crossed positions in 32-bit chunks. * * **Description**:\n * This instruction subtracts the 16-bit integer element in [15:0] of 32-bit chunks in Rs2 * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [31:16] of 32-bit chunks * in Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to * [15:0] of 32-bit chunks in Rd. * * **Note**:\n * This instruction can be used for either signed or unsigned operations. * * **Operations**:\n * ~~~ * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][15:0]; * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][31:16]; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CRSA16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("crsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.21. CRSA16 ===== */ /* ===== Inline Function Start for 3.22. INSB ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC * \brief INSB (Insert Byte) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * (RV32) INSB Rd, Rs1, imm[1:0] * (RV64) INSB Rd, Rs1, imm[2:0] * ~~~ * * **Purpose**:\n * Insert byte 0 of a 32-bit or 64-bit register into one of the byte elements of another register. * * **Description**:\n * This instruction inserts byte 0 of Rs1 into byte `imm[1:0]` (RV32) or `imm[2:0]` (RV64) * of Rd. * * **Operations**:\n * ~~~ * bpos = imm[1:0]; (RV32) * bpos = imm[2:0]; (RV64) * Rd.B[bpos] = Rs1.B[0] * ~~~ * * \param [in] t unsigned long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ #define __RV_INSB(t, a, b) \ ({ \ unsigned long __t = (unsigned long)(t); \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("insb %0, %1, %2" : "+r"(__t) : "r"(__a), "K"(b)); \ __t; \ }) /* ===== Inline Function End for 3.22. INSB ===== */ /* ===== Inline Function Start for 3.23. KABS8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC * \brief KABS8 (SIMD 8-bit Saturating Absolute) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KABS8 Rd, Rs1 * ~~~ * * **Purpose**:\n * Get the absolute value of 8-bit signed integer elements simultaneously. * * **Description**:\n * This instruction calculates the absolute value of 8-bit signed integer elements stored * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates * 0x7f as the output and sets the OV bit to 1. * * **Operations**:\n * ~~~ * src = Rs1.B[x]; * if (src == 0x80) { * src = 0x7f; * OV = 1; * } else if (src[7] == 1) * src = -src; * } * Rd.B[x] = src; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KABS8(unsigned long a) { unsigned long result; __ASM volatile("kabs8 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.23. KABS8 ===== */ /* ===== Inline Function Start for 3.24. KABS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC * \brief KABS16 (SIMD 16-bit Saturating Absolute) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KABS16 Rd, Rs1 * ~~~ * * **Purpose**:\n * Get the absolute value of 16-bit signed integer elements simultaneously. * * **Description**:\n * This instruction calculates the absolute value of 16-bit signed integer elements stored * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction * generates 0x7fff as the output and sets the OV bit to 1. * * **Operations**:\n * ~~~ * src = Rs1.H[x]; * if (src == 0x8000) { * src = 0x7fff; * OV = 1; * } else if (src[15] == 1) * src = -src; * } * Rd.H[x] = src; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KABS16(unsigned long a) { unsigned long result; __ASM volatile("kabs16 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.24. KABS16 ===== */ /* ===== Inline Function Start for 3.25. KABSW ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief KABSW (Scalar 32-bit Absolute Value with Saturation) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KABSW Rd, Rs1 * ~~~ * * **Purpose**:\n * Get the absolute value of a signed 32-bit integer in a general register. * * **Description**:\n * This instruction calculates the absolute value of a signed 32-bit integer stored in Rs1. * The result is sign-extended (for RV64) and written to Rd. This instruction with the minimum * negative integer input of 0x80000000 will produce a saturated output of maximum positive integer * of 0x7fffffff and the OV flag will be set to 1. * * **Operations**:\n * ~~~ * if (Rs1.W[0] >= 0) { * res = Rs1.W[0]; * } else { * If (Rs1.W[0] == 0x80000000) { * res = 0x7fffffff; * OV = 1; * } else { * res = -Rs1.W[0]; * } * } * Rd = SE32(res); * ~~~ * * \param [in] a signed long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KABSW(signed long a) { unsigned long result; __ASM volatile("kabsw %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.25. KABSW ===== */ /* ===== Inline Function Start for 3.26. KADD8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB * \brief KADD8 (SIMD 8-bit Signed Saturating Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KADD8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit signed integer element saturating additions simultaneously. * * **Description**:\n * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.B[x] + Rs2.B[x]; * if (res[x] > 127) { * res[x] = 127; * OV = 1; * } else if (res[x] < -128) { * res[x] = -128; * OV = 1; * } * Rd.B[x] = res[x]; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KADD8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("kadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.26. KADD8 ===== */ /* ===== Inline Function Start for 3.27. KADD16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief KADD16 (SIMD 16-bit Signed Saturating Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KADD16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element saturating additions simultaneously. * * **Description**:\n * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.H[x] + Rs2.H[x]; * if (res[x] > 32767) { * res[x] = 32767; * OV = 1; * } else if (res[x] < -32768) { * res[x] = -32768; * OV = 1; * } * Rd.H[x] = res[x]; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KADD16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("kadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.27. KADD16 ===== */ /* ===== Inline Function Start for 3.28. KADD64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB * \brief KADD64 (64-bit Signed Saturating Addition) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * KADD64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Add two 64-bit signed integers. The result is saturated to the Q63 range. * * **RV32 Description**:\n * This instruction adds the 64-bit signed integer of an even/odd pair of registers * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by * Rs2(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers * specified by Rd(4,1). * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register * pair includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed * integer in Rs2. If the result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the * range and the OV bit is set to 1. The saturated result is written to Rd. * * **Operations**:\n * ~~~ * RV32: * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1); * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1); * result = R[a_H].R[a_L] + R[b_H].R[b_L]; * if (result > (2^63)-1) { * result = (2^63)-1; OV = 1; * } else if (result < -2^63) { * result = -2^63; OV = 1; * } * R[t_H].R[t_L] = result; * RV64: * result = Rs1 + Rs2; * if (result > (2^63)-1) { * result = (2^63)-1; OV = 1; * } else if (result < -2^63) { * result = -2^63; OV = 1; * } * Rd = result; * ~~~ * * \param [in] a long long type of value stored in a * \param [in] b long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_KADD64(long long a, long long b) { long long result; __ASM volatile("kadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.28. KADD64 ===== */ /* ===== Inline Function Start for 3.29. KADDH ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU * \brief KADDH (Signed Addition with Q15 Saturation) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KADDH Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Add the signed lower 32-bit content of two registers with Q15 saturation. * * **Description**:\n * The signed lower 32-bit content of Rs1 is added with the signed lower 32-bit content of * Rs2. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then sign- * extended and written to Rd. If saturation happens, this instruction sets the OV flag. * * **Operations**:\n * ~~~ * tmp = Rs1.W[0] + Rs2.W[0]; * if (tmp > 32767) { * res = 32767; * OV = 1; * } else if (tmp < -32768) { * res = -32768; * OV = 1 * } else { * res = tmp; * } * Rd = SE(tmp[15:0]); * ~~~ * * \param [in] a int type of value stored in a * \param [in] b int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KADDH(int a, int b) { long result; __ASM volatile("kaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.29. KADDH ===== */ /* ===== Inline Function Start for 3.30. KADDW ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief KADDW (Signed Addition with Q31 Saturation) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KADDW Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Add the lower 32-bit signed content of two registers with Q31 saturation. * * **Description**:\n * The lower 32-bit signed content of Rs1 is added with the lower 32-bit signed content of * Rs2. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then sign- * extended and written to Rd. If saturation happens, this instruction sets the OV flag. * * **Operations**:\n * ~~~ * tmp = Rs1.W[0] + Rs2.W[0]; * if (tmp > (2^31)-1) { * res = (2^31)-1; * OV = 1; * } else if (tmp < -2^31) { * res = -2^31; * OV = 1 * } else { * res = tmp; * } * Rd = res[31:0]; // RV32 * Rd = SE(res[31:0]) // RV64 * ~~~ * * \param [in] a int type of value stored in a * \param [in] b int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KADDW(int a, int b) { long result; __ASM volatile("kaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.30. KADDW ===== */ /* ===== Inline Function Start for 3.31. KCRAS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief KCRAS16 (SIMD 16-bit Signed Saturating Cross Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KCRAS16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element saturating addition and 16-bit signed integer element * saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32- * bit chunks. * * **Description**:\n * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it * subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for * subtraction. * * **Operations**:\n * ~~~ * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0]; * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16]; * for (res in [res1, res2]) { * if (res > (2^15)-1) { * res = (2^15)-1; * OV = 1; * } else if (res < -2^15) { * res = -2^15; * OV = 1; * } * } * Rd.W[x][31:16] = res1; * Rd.W[x][15:0] = res2; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KCRAS16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("kcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.31. KCRAS16 ===== */ /* ===== Inline Function Start for 3.32. KCRSA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief KCRSA16 (SIMD 16-bit Signed Saturating Cross Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KCRSA16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element * saturating addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit * chunks. * * **Description**:\n * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it * adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 with the 16-bit signed * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated * results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd * for addition. * * **Operations**:\n * ~~~ * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0]; * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16]; * for (res in [res1, res2]) { * if (res > (2^15)-1) { * res = (2^15)-1; * OV = 1; * } else if (res < -2^15) { * res = -2^15; * OV = 1; * } * } * Rd.W[x][31:16] = res1; * Rd.W[x][15:0] = res2; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KCRSA16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("kcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.32. KCRSA16 ===== */ /* ===== Inline Function Start for 3.33.1. KDMBB ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief KDMBB (Signed Saturating Double Multiply B16 x B16) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is * written into the destination register for RV32 or sign-extended to 64-bits and written into the * destination register for RV64. If saturation happens, an overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be * saturated to 0x7FFFFFFF and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT * If (0x8000 != aop | 0x8000 != bop) { * Mresult = aop * bop; * resQ31 = Mresult << 1; * Rd = resQ31; // RV32 * Rd = SE(resQ31); // RV64 * } else { * resQ31 = 0x7FFFFFFF; * Rd = resQ31; // RV32 * Rd = SE(resQ31); // RV64 * OV = 1; * } * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KDMBB(unsigned int a, unsigned int b) { long result; __ASM volatile("kdmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.33.1. KDMBB ===== */ /* ===== Inline Function Start for 3.33.2. KDMBT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief KDMBT (Signed Saturating Double Multiply B16 x T16) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is * written into the destination register for RV32 or sign-extended to 64-bits and written into the * destination register for RV64. If saturation happens, an overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be * saturated to 0x7FFFFFFF and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT * If (0x8000 != aop | 0x8000 != bop) { * Mresult = aop * bop; * resQ31 = Mresult << 1; * Rd = resQ31; // RV32 * Rd = SE(resQ31); // RV64 * } else { * resQ31 = 0x7FFFFFFF; * Rd = resQ31; // RV32 * Rd = SE(resQ31); // RV64 * OV = 1; * } * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KDMBT(unsigned int a, unsigned int b) { long result; __ASM volatile("kdmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.33.2. KDMBT ===== */ /* ===== Inline Function Start for 3.33.3. KDMTT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief KDMTT (Signed Saturating Double Multiply T16 x T16) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is * written into the destination register for RV32 or sign-extended to 64-bits and written into the * destination register for RV64. If saturation happens, an overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be * saturated to 0x7FFFFFFF and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT * If (0x8000 != aop | 0x8000 != bop) { * Mresult = aop * bop; * resQ31 = Mresult << 1; * Rd = resQ31; // RV32 * Rd = SE(resQ31); // RV64 * } else { * resQ31 = 0x7FFFFFFF; * Rd = resQ31; // RV32 * Rd = SE(resQ31); // RV64 * OV = 1; * } * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KDMTT(unsigned int a, unsigned int b) { long result; __ASM volatile("kdmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.33.3. KDMTT ===== */ /* ===== Inline Function Start for 3.34.1. KDMABB ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief KDMABB (Signed Saturating Double Multiply Addition B16 x B16) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result * with the sign-extended lower 32-bit chunk destination register and write the saturated addition * result into the destination register. If saturation happens, an overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and * the OV flag is set to 1. The result after saturation is written to Rd. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be * set. * * **Operations**:\n * ~~~ * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT * If (0x8000 != aop | 0x8000 != bop) { * Mresult = aop * bop; * resQ31 = Mresult << 1; * } else { * resQ31 = 0x7FFFFFFF; * OV = 1; * } * resadd = Rd + resQ31; // RV32 * resadd = Rd.W[0] + resQ31; // RV64 * if (resadd > (2^31)-1) { * resadd = (2^31)-1; * OV = 1; * } else if (resadd < -2^31) { * resadd = -2^31; * OV = 1; * } * Rd = resadd; // RV32 * Rd = SE(resadd); // RV64 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KDMABB(long t, unsigned int a, unsigned int b) { __ASM volatile("kdmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.34.1. KDMABB ===== */ /* ===== Inline Function Start for 3.34.2. KDMABT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief KDMABT (Signed Saturating Double Multiply Addition B16 x T16) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result * with the sign-extended lower 32-bit chunk destination register and write the saturated addition * result into the destination register. If saturation happens, an overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and * the OV flag is set to 1. The result after saturation is written to Rd. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be * set. * * **Operations**:\n * ~~~ * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT * If (0x8000 != aop | 0x8000 != bop) { * Mresult = aop * bop; * resQ31 = Mresult << 1; * } else { * resQ31 = 0x7FFFFFFF; * OV = 1; * } * resadd = Rd + resQ31; // RV32 * resadd = Rd.W[0] + resQ31; // RV64 * if (resadd > (2^31)-1) { * resadd = (2^31)-1; * OV = 1; * } else if (resadd < -2^31) { * resadd = -2^31; * OV = 1; * } * Rd = resadd; // RV32 * Rd = SE(resadd); // RV64 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KDMABT(long t, unsigned int a, unsigned int b) { __ASM volatile("kdmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.34.2. KDMABT ===== */ /* ===== Inline Function Start for 3.34.3. KDMATT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief KDMATT (Signed Saturating Double Multiply Addition T16 x T16) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result * with the sign-extended lower 32-bit chunk destination register and write the saturated addition * result into the destination register. If saturation happens, an overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and * the OV flag is set to 1. The result after saturation is written to Rd. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be * set. * * **Operations**:\n * ~~~ * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT * If (0x8000 != aop | 0x8000 != bop) { * Mresult = aop * bop; * resQ31 = Mresult << 1; * } else { * resQ31 = 0x7FFFFFFF; * OV = 1; * } * resadd = Rd + resQ31; // RV32 * resadd = Rd.W[0] + resQ31; // RV64 * if (resadd > (2^31)-1) { * resadd = (2^31)-1; * OV = 1; * } else if (resadd < -2^31) { * resadd = -2^31; * OV = 1; * } * Rd = resadd; // RV32 * Rd = SE(resadd); // RV64 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KDMATT(long t, unsigned int a, unsigned int b) { __ASM volatile("kdmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.34.3. KDMATT ===== */ /* ===== Inline Function Start for 3.35.1. KHM8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY * \brief KHM8 (SIMD Signed Saturating Q7 Multiply) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KHM8 Rd, Rs1, Rs2 * KHMX8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7 * numbers again. * * **Description**:\n * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7 * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2. * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7 * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2. * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen. * The result will be saturated to 0x7F and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * if (is `KHM8`) { * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom * } else if (is `KHMX8`) { * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom * } * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * if (0x80 != aop | 0x80 != bop) { * res = (aop s* bop) >> 7; * } else { * res= 0x7F; * OV = 1; * } * } * Rd.H[x/2] = concat(rest, resb); * for RV32, x=0,2 * for RV64, x=0,2,4,6 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KHM8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("khm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.35.1. KHM8 ===== */ /* ===== Inline Function Start for 3.35.2. KHMX8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY * \brief KHMX8 (SIMD Signed Saturating Crossed Q7 Multiply) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KHM8 Rd, Rs1, Rs2 * KHMX8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7 * numbers again. * * **Description**:\n * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7 * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2. * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7 * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2. * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen. * The result will be saturated to 0x7F and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * if (is `KHM8`) { * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom * } else if (is `KHMX8`) { * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom * } * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * if (0x80 != aop | 0x80 != bop) { * res = (aop s* bop) >> 7; * } else { * res= 0x7F; * OV = 1; * } * } * Rd.H[x/2] = concat(rest, resb); * for RV32, x=0,2 * for RV64, x=0,2,4,6 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KHMX8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("khmx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.35.2. KHMX8 ===== */ /* ===== Inline Function Start for 3.36.1. KHM16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY * \brief KHM16 (SIMD Signed Saturating Q15 Multiply) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KHM16 Rd, Rs1, Rs2 * KHMX16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to * Q15 numbers again. * * **Description**:\n * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in * Rs2. * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15 * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * if (is `KHM16`) { * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom * } else if (is `KHMX16`) { * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom * } * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * if (0x8000 != aop | 0x8000 != bop) { * res = (aop s* bop) >> 15; * } else { * res= 0x7FFF; * OV = 1; * } * } * Rd.W[x/2] = concat(rest, resb); * for RV32: x=0 * for RV64: x=0,2 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KHM16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("khm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.36.1. KHM16 ===== */ /* ===== Inline Function Start for 3.36.2. KHMX16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY * \brief KHMX16 (SIMD Signed Saturating Crossed Q15 Multiply) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KHM16 Rd, Rs1, Rs2 * KHMX16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to * Q15 numbers again. * * **Description**:\n * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in * Rs2. * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15 * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * if (is `KHM16`) { * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom * } else if (is `KHMX16`) { * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom * } * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * if (0x8000 != aop | 0x8000 != bop) { * res = (aop s* bop) >> 15; * } else { * res= 0x7FFF; * OV = 1; * } * } * Rd.W[x/2] = concat(rest, resb); * for RV32: x=0 * for RV64: x=0,2 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KHMX16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("khmx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.36.2. KHMX16 ===== */ /* ===== Inline Function Start for 3.37.1. KHMBB ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU * \brief KHMBB (Signed Saturating Half Multiply B16 x B16) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15 * number again and saturate the Q15 result into the destination register. If saturation happens, an * overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right- * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated * to 0x7FFF and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT * If (0x8000 != aop | 0x8000 != bop) { * Mresult[31:0] = aop * bop; * res[15:0] = Mresult[30:15]; * } else { * res[15:0] = 0x7FFF; * OV = 1; * } * Rd = SE32(res[15:0]); // Rv32 * Rd = SE64(res[15:0]); // RV64 * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KHMBB(unsigned int a, unsigned int b) { long result; __ASM volatile("khmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.37.1. KHMBB ===== */ /* ===== Inline Function Start for 3.37.2. KHMBT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU * \brief KHMBT (Signed Saturating Half Multiply B16 x T16) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15 * number again and saturate the Q15 result into the destination register. If saturation happens, an * overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right- * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated * to 0x7FFF and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT * If (0x8000 != aop | 0x8000 != bop) { * Mresult[31:0] = aop * bop; * res[15:0] = Mresult[30:15]; * } else { * res[15:0] = 0x7FFF; * OV = 1; * } * Rd = SE32(res[15:0]); // Rv32 * Rd = SE64(res[15:0]); // RV64 * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KHMBT(unsigned int a, unsigned int b) { long result; __ASM volatile("khmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.37.2. KHMBT ===== */ /* ===== Inline Function Start for 3.37.3. KHMTT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU * \brief KHMTT (Signed Saturating Half Multiply T16 x T16) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15 * number again and saturate the Q15 result into the destination register. If saturation happens, an * overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right- * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated * to 0x7FFF and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT * If (0x8000 != aop | 0x8000 != bop) { * Mresult[31:0] = aop * bop; * res[15:0] = Mresult[30:15]; * } else { * res[15:0] = 0x7FFF; * OV = 1; * } * Rd = SE32(res[15:0]); // Rv32 * Rd = SE64(res[15:0]); // RV64 * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KHMTT(unsigned int a, unsigned int b) { long result; __ASM volatile("khmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.37.3. KHMTT ===== */ /* ===== Inline Function Start for 3.38.1. KMABB ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief KMABB (SIMD Saturating Signed Multiply Bottom Halfs & Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMABB Rd, Rs1, Rs2 * KMABT Rd, Rs1, Rs2 * KMATT Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content * of 32-bit elements in another register and add the result to the content of 32-bit elements in the * third register. The addition result may be saturated and is written to the third register. * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element) * * KMABT rd.W[x] + bottom*top (per 32-bit element) * * KMATT rd.W[x] + top*top (per 32-bit element) * * **Description**:\n * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the bottom 16-bit content of 32-bit elements in Rs2. * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the top 16-bit content of 32-bit elements in Rs2. * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the * top 16-bit content of 32-bit elements in Rs2. * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as * signed integers. * * **Operations**:\n * ~~~ * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMABB(long t, unsigned long a, unsigned long b) { __ASM volatile("kmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.38.1. KMABB ===== */ /* ===== Inline Function Start for 3.38.2. KMABT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief KMABT (SIMD Saturating Signed Multiply Bottom & Top Halfs & Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMABB Rd, Rs1, Rs2 * KMABT Rd, Rs1, Rs2 * KMATT Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content * of 32-bit elements in another register and add the result to the content of 32-bit elements in the * third register. The addition result may be saturated and is written to the third register. * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element) * * KMABT rd.W[x] + bottom*top (per 32-bit element) * * KMATT rd.W[x] + top*top (per 32-bit element) * * **Description**:\n * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the bottom 16-bit content of 32-bit elements in Rs2. * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the top 16-bit content of 32-bit elements in Rs2. * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the * top 16-bit content of 32-bit elements in Rs2. * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as * signed integers. * * **Operations**:\n * ~~~ * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMABT(long t, unsigned long a, unsigned long b) { __ASM volatile("kmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.38.2. KMABT ===== */ /* ===== Inline Function Start for 3.38.3. KMATT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief KMATT (SIMD Saturating Signed Multiply Top Halfs & Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMABB Rd, Rs1, Rs2 * KMABT Rd, Rs1, Rs2 * KMATT Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content * of 32-bit elements in another register and add the result to the content of 32-bit elements in the * third register. The addition result may be saturated and is written to the third register. * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element) * * KMABT rd.W[x] + bottom*top (per 32-bit element) * * KMATT rd.W[x] + top*top (per 32-bit element) * * **Description**:\n * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the bottom 16-bit content of 32-bit elements in Rs2. * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the top 16-bit content of 32-bit elements in Rs2. * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the * top 16-bit content of 32-bit elements in Rs2. * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as * signed integers. * * **Operations**:\n * ~~~ * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMATT(long t, unsigned long a, unsigned long b) { __ASM volatile("kmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.38.3. KMATT ===== */ /* ===== Inline Function Start for 3.39.1. KMADA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief KMADA (SIMD Saturating Signed Multiply Two Halfs and Two Adds) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMADA Rd, Rs1, Rs2 * KMAXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds * the two 32-bit results and 32-bit elements in a third register together. The addition result may be * saturated. * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element) * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element) * * **Description**:\n * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit * elements in Rs2. * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in * Rs2. * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed * integers. * * **Operations**:\n * ~~~ * // KMADA * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); * // KMAXDA * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMADA(long t, unsigned long a, unsigned long b) { __ASM volatile("kmada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.39.1. KMADA ===== */ /* ===== Inline Function Start for 3.39.2. KMAXDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief KMAXDA (SIMD Saturating Signed Crossed Multiply Two Halfs and Two Adds) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMADA Rd, Rs1, Rs2 * KMAXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds * the two 32-bit results and 32-bit elements in a third register together. The addition result may be * saturated. * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element) * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element) * * **Description**:\n * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit * elements in Rs2. * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in * Rs2. * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed * integers. * * **Operations**:\n * ~~~ * // KMADA * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); * // KMAXDA * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMAXDA(long t, unsigned long a, unsigned long b) { __ASM volatile("kmaxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.39.2. KMAXDA ===== */ /* ===== Inline Function Start for 3.40.1. KMADS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief KMADS (SIMD Saturating Signed Multiply Two Halfs & Subtract & Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMADS Rd, Rs1, Rs2 * KMADRS Rd, Rs1, Rs2 * KMAXDS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to * the corresponding 32-bit elements in a third register. The addition result may be saturated. * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element) * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element) * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element) * * **Description**:\n * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit * elements in Rs2. * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32- * bit elements in Rs2. * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit * elements in Rs2. * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1 * and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * // KMADS * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); * // KMADRS * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); * // KMAXDS * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMADS(long t, unsigned long a, unsigned long b) { __ASM volatile("kmads %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.40.1. KMADS ===== */ /* ===== Inline Function Start for 3.40.2. KMADRS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief KMADRS (SIMD Saturating Signed Multiply Two Halfs & Reverse Subtract & Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMADS Rd, Rs1, Rs2 * KMADRS Rd, Rs1, Rs2 * KMAXDS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to * the corresponding 32-bit elements in a third register. The addition result may be saturated. * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element) * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element) * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element) * * **Description**:\n * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit * elements in Rs2. * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32- * bit elements in Rs2. * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit * elements in Rs2. * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1 * and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * // KMADS * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); * // KMADRS * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); * // KMAXDS * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMADRS(long t, unsigned long a, unsigned long b) { __ASM volatile("kmadrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.40.2. KMADRS ===== */ /* ===== Inline Function Start for 3.40.3. KMAXDS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief KMAXDS (SIMD Saturating Signed Crossed Multiply Two Halfs & Subtract & Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMADS Rd, Rs1, Rs2 * KMADRS Rd, Rs1, Rs2 * KMAXDS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to * the corresponding 32-bit elements in a third register. The addition result may be saturated. * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element) * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element) * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element) * * **Description**:\n * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit * elements in Rs2. * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32- * bit elements in Rs2. * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit * elements in Rs2. * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1 * and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * // KMADS * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); * // KMADRS * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); * // KMAXDS * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMAXDS(long t, unsigned long a, unsigned long b) { __ASM volatile("kmaxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.40.3. KMAXDS ===== */ /* ===== Inline Function Start for 3.41. KMAR64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB * \brief KMAR64 (Signed Multiply and Saturating Add to 64-Bit Data) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * KMAR64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication * results to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64). * * **RV32 Description**:\n * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by * Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the Q63 number range (-2^63 <= * Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated result is written back * to the even/odd pair of registers specified by Rd(4,1). * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register * pair includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It * adds the 64-bit multiplication results to the 64-bit signed data of Rd with unlimited precision. If the * 64-bit addition result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range * and the OV bit is set to 1. The saturated result is written back to Rd. * * **Operations**:\n * ~~~ * RV32: * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * result = R[t_H].R[t_L] + (Rs1 * Rs2); * if (result > (2^63)-1) { * result = (2^63)-1; OV = 1; * } else if (result < -2^63) { * result = -2^63; OV = 1; * } * R[t_H].R[t_L] = result; * RV64: * // `result` has unlimited precision * result = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]); * if (result > (2^63)-1) { * result = (2^63)-1; OV = 1; * } else if (result < -2^63) { * result = -2^63; OV = 1; * } * Rd = result; * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a long type of value stored in a * \param [in] b long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_KMAR64(long long t, long a, long b) { __ASM volatile("kmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.41. KMAR64 ===== */ /* ===== Inline Function Start for 3.42.1. KMDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief KMDA (SIMD Signed Multiply Two Halfs and Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMDA Rd, Rs1, Rs2 * KMXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then * adds the two 32-bit results together. The addition result may be saturated. * * KMDA: top*top + bottom*bottom (per 32-bit element) * * KMXDA: top*bottom + bottom*top (per 32-bit element) * * **Description**:\n * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32- * bit elements of Rs2. * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the * 32-bit elements of Rs2. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1. * The final results are written to Rd. The 16-bit contents are treated as signed integers. * * **Operations**:\n * ~~~ * if Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000 { // KMDA Rd.W[x] = Rs1.W[x].H[1] * * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0]) * + (Rs1.W[x].H[0] * Rs2.W[x].H[1]; } else { Rd.W[x] = 0x7fffffff; OV = 1; } for RV32: x=0 for RV64: * x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMDA(unsigned long a, unsigned long b) { long result; __ASM volatile("kmda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.42.1. KMDA ===== */ /* ===== Inline Function Start for 3.42.2. KMXDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief KMXDA (SIMD Signed Crossed Multiply Two Halfs and Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMDA Rd, Rs1, Rs2 * KMXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then * adds the two 32-bit results together. The addition result may be saturated. * * KMDA: top*top + bottom*bottom (per 32-bit element) * * KMXDA: top*bottom + bottom*top (per 32-bit element) * * **Description**:\n * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32- * bit elements of Rs2. * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the * 32-bit elements of Rs2. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1. * The final results are written to Rd. The 16-bit contents are treated as signed integers. * * **Operations**:\n * ~~~ * if Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000 { // KMDA Rd.W[x] = Rs1.W[x].H[1] * * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0]) * + (Rs1.W[x].H[0] * Rs2.W[x].H[1]; } else { Rd.W[x] = 0x7fffffff; OV = 1; } for RV32: x=0 for RV64: * x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMXDA(unsigned long a, unsigned long b) { long result; __ASM volatile("kmxda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.42.2. KMXDA ===== */ /* ===== Inline Function Start for 3.43.1. KMMAC ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC * \brief KMMAC (SIMD Saturating MSW Signed Multiply Word and Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMAC Rd, Rs1, Rs2 * KMMAC.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of two registers and add the most significant * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are * saturated first and then written back to the third register. The `.u` form performs an additional * rounding up operation on the multiplication results before adding the most significant 32-bit part * of the results. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2 * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by * adding a 1 to bit 31 of the results. * * **Operations**:\n * ~~~ * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; * if (`.u` form) { * Round[x][32:0] = Mres[x][63:31] + 1; * res[x] = Rd.W[x] + Round[x][32:1]; * } else { * res[x] = Rd.W[x] + Mres[x][63:32]; * } * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a long type of value stored in a * \param [in] b long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMAC(long t, long a, long b) { __ASM volatile("kmmac %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.43.1. KMMAC ===== */ /* ===== Inline Function Start for 3.43.2. KMMAC.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC * \brief KMMAC.u (SIMD Saturating MSW Signed Multiply Word and Add with Rounding) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMAC Rd, Rs1, Rs2 * KMMAC.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of two registers and add the most significant * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are * saturated first and then written back to the third register. The `.u` form performs an additional * rounding up operation on the multiplication results before adding the most significant 32-bit part * of the results. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2 * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by * adding a 1 to bit 31 of the results. * * **Operations**:\n * ~~~ * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; * if (`.u` form) { * Round[x][32:0] = Mres[x][63:31] + 1; * res[x] = Rd.W[x] + Round[x][32:1]; * } else { * res[x] = Rd.W[x] + Mres[x][63:32]; * } * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a long type of value stored in a * \param [in] b long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMAC_U(long t, long a, long b) { __ASM volatile("kmmac.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.43.2. KMMAC.u ===== */ /* ===== Inline Function Start for 3.44.1. KMMAWB ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief KMMAWB (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMAWB Rd, Rs1, Rs2 * KMMAWB.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the * corresponding 32-bit elements of another register and add the most significant 32-bit results with * the corresponding signed 32-bit elements of a third register. The addition result is written to the * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication * results from the most significant discarded bit before the addition operations. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to * bit 15 of the result before the addition operations. * * **Operations**:\n * ~~~ * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0]; * if (`.u` form) { * Round[x][32:0] = Mres[x][47:15] + 1; * res[x] = Rd.W[x] + Round[x][32:1]; * } else { * res[x] = Rd.W[x] + Mres[x][47:16]; * } * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMAWB(long t, unsigned long a, unsigned long b) { __ASM volatile("kmmawb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.44.1. KMMAWB ===== */ /* ===== Inline Function Start for 3.44.2. KMMAWB.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief KMMAWB.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add with Rounding) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMAWB Rd, Rs1, Rs2 * KMMAWB.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the * corresponding 32-bit elements of another register and add the most significant 32-bit results with * the corresponding signed 32-bit elements of a third register. The addition result is written to the * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication * results from the most significant discarded bit before the addition operations. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to * bit 15 of the result before the addition operations. * * **Operations**:\n * ~~~ * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0]; * if (`.u` form) { * Round[x][32:0] = Mres[x][47:15] + 1; * res[x] = Rd.W[x] + Round[x][32:1]; * } else { * res[x] = Rd.W[x] + Mres[x][47:16]; * } * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMAWB_U(long t, unsigned long a, unsigned long b) { __ASM volatile("kmmawb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.44.2. KMMAWB.u ===== */ /* ===== Inline Function Start for 3.45.1. KMMAWB2 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief KMMAWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMAWB2 Rd, Rs1, Rs2 * KMMAWB2.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the * corresponding 32-bit elements of another register, double the multiplication results and add the * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third * register. The saturated addition result is written to the corresponding 32-bit elements of the third * register. The `.u` form rounds up the multiplication results from the most significant discarded bit * before the addition operations. * * **Description**:\n * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is * saturated to the range and the OV bit is set to 1. The results after saturation are written to the * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of * the result before the addition operations. * * **Operations**:\n * ~~~ * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) { * addop.W[x] = 0x7fffffff; * OV = 1; * } else { * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0]; * if (`.u` form) { * Mres[x][47:14] = Mres[x][47:14] + 1; * } * addop.W[x] = Mres[x][46:15]; // doubling * } * res[x] = Rd.W[x] + addop.W[x]; * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMAWB2(long t, unsigned long a, unsigned long b) { __ASM volatile("kmmawb2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.45.1. KMMAWB2 ===== */ /* ===== Inline Function Start for 3.45.2. KMMAWB2.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief KMMAWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add with Rounding) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMAWB2 Rd, Rs1, Rs2 * KMMAWB2.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the * corresponding 32-bit elements of another register, double the multiplication results and add the * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third * register. The saturated addition result is written to the corresponding 32-bit elements of the third * register. The `.u` form rounds up the multiplication results from the most significant discarded bit * before the addition operations. * * **Description**:\n * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is * saturated to the range and the OV bit is set to 1. The results after saturation are written to the * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of * the result before the addition operations. * * **Operations**:\n * ~~~ * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) { * addop.W[x] = 0x7fffffff; * OV = 1; * } else { * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0]; * if (`.u` form) { * Mres[x][47:14] = Mres[x][47:14] + 1; * } * addop.W[x] = Mres[x][46:15]; // doubling * } * res[x] = Rd.W[x] + addop.W[x]; * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMAWB2_U(long t, unsigned long a, unsigned long b) { __ASM volatile("kmmawb2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.45.2. KMMAWB2.u ===== */ /* ===== Inline Function Start for 3.46.1. KMMAWT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief KMMAWT (SIMD Saturating MSW Signed Multiply Word and Top Half and Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMAWT Rd, Rs1, Rs2 * KMMAWT.u Rd Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the * corresponding 32-bit elements of another register and add the most significant 32-bit results with * the corresponding signed 32-bit elements of a third register. The addition results are written to the * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication * results from the most significant discarded bit before the addition operations. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to * bit 15 of the result before the addition operations. * * **Operations**:\n * ~~~ * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1]; * if (`.u` form) { * Round[x][32:0] = Mres[x][47:15] + 1; * res[x] = Rd.W[x] + Round[x][32:1]; * } else { * res[x] = Rd.W[x] + Mres[x][47:16]; * } * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMAWT(long t, unsigned long a, unsigned long b) { __ASM volatile("kmmawt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.46.1. KMMAWT ===== */ /* ===== Inline Function Start for 3.46.2. KMMAWT.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief KMMAWT.u (SIMD Saturating MSW Signed Multiply Word and Top Half and Add with Rounding) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMAWT Rd, Rs1, Rs2 * KMMAWT.u Rd Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the * corresponding 32-bit elements of another register and add the most significant 32-bit results with * the corresponding signed 32-bit elements of a third register. The addition results are written to the * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication * results from the most significant discarded bit before the addition operations. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to * bit 15 of the result before the addition operations. * * **Operations**:\n * ~~~ * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1]; * if (`.u` form) { * Round[x][32:0] = Mres[x][47:15] + 1; * res[x] = Rd.W[x] + Round[x][32:1]; * } else { * res[x] = Rd.W[x] + Mres[x][47:16]; * } * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMAWT_U(long t, unsigned long a, unsigned long b) { __ASM volatile("kmmawt.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.46.2. KMMAWT.u ===== */ /* ===== Inline Function Start for 3.47.1. KMMAWT2 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief KMMAWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMAWT2 Rd, Rs1, Rs2 * KMMAWT2.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit elements of one register and the top 16-bit of the * corresponding 32-bit elements of another register, double the multiplication results and add the * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third * register. The saturated addition result is written to the corresponding 32-bit elements of the third * register. The `.u` form rounds up the multiplication results from the most significant discarded bit * before the addition operations. * * **Description**:\n * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is * saturated to the range and the OV bit is set to 1. The results after saturation are written to the * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of * the result before the addition operations. * * **Operations**:\n * ~~~ * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) { * addop.W[x] = 0x7fffffff; * OV = 1; * } else { * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1]; * if (`.u` form) { * Mres[x][47:14] = Mres[x][47:14] + 1; * } * addop.W[x] = Mres[x][46:15]; // doubling * } * res[x] = Rd.W[x] + addop.W[x]; * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMAWT2(long t, unsigned long a, unsigned long b) { __ASM volatile("kmmawt2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.47.1. KMMAWT2 ===== */ /* ===== Inline Function Start for 3.47.2. KMMAWT2.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief KMMAWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add with Rounding) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMAWT2 Rd, Rs1, Rs2 * KMMAWT2.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit elements of one register and the top 16-bit of the * corresponding 32-bit elements of another register, double the multiplication results and add the * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third * register. The saturated addition result is written to the corresponding 32-bit elements of the third * register. The `.u` form rounds up the multiplication results from the most significant discarded bit * before the addition operations. * * **Description**:\n * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is * saturated to the range and the OV bit is set to 1. The results after saturation are written to the * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of * the result before the addition operations. * * **Operations**:\n * ~~~ * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) { * addop.W[x] = 0x7fffffff; * OV = 1; * } else { * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1]; * if (`.u` form) { * Mres[x][47:14] = Mres[x][47:14] + 1; * } * addop.W[x] = Mres[x][46:15]; // doubling * } * res[x] = Rd.W[x] + addop.W[x]; * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMAWT2_U(long t, unsigned long a, unsigned long b) { __ASM volatile("kmmawt2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.47.2. KMMAWT2.u ===== */ /* ===== Inline Function Start for 3.48.1. KMMSB ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC * \brief KMMSB (SIMD Saturating MSW Signed Multiply Word and Subtract) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMSB Rd, Rs1, Rs2 * KMMSB.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of two registers and subtract the most * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results * are written to the third register. The `.u` form performs an additional rounding up operation on * the multiplication results before subtracting the most significant 32-bit part of the results. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2 * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by * adding a 1 to bit 31 of the results. * * **Operations**:\n * ~~~ * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; * if (`.u` form) { * Round[x][32:0] = Mres[x][63:31] + 1; * res[x] = Rd.W[x] - Round[x][32:1]; * } else { * res[x] = Rd.W[x] - Mres[x][63:32]; * } * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a long type of value stored in a * \param [in] b long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMSB(long t, long a, long b) { __ASM volatile("kmmsb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.48.1. KMMSB ===== */ /* ===== Inline Function Start for 3.48.2. KMMSB.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC * \brief KMMSB.u (SIMD Saturating MSW Signed Multiply Word and Subtraction with Rounding) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMSB Rd, Rs1, Rs2 * KMMSB.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of two registers and subtract the most * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results * are written to the third register. The `.u` form performs an additional rounding up operation on * the multiplication results before subtracting the most significant 32-bit part of the results. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2 * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by * adding a 1 to bit 31 of the results. * * **Operations**:\n * ~~~ * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; * if (`.u` form) { * Round[x][32:0] = Mres[x][63:31] + 1; * res[x] = Rd.W[x] - Round[x][32:1]; * } else { * res[x] = Rd.W[x] - Mres[x][63:32]; * } * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a long type of value stored in a * \param [in] b long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMSB_U(long t, long a, long b) { __ASM volatile("kmmsb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.48.2. KMMSB.u ===== */ /* ===== Inline Function Start for 3.49.1. KMMWB2 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief KMMWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMWB2 Rd, Rs1, Rs2 * KMMWB2.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the * corresponding 32-bit elements of another register, double the multiplication results and write the * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u` * form rounds up the results from the most significant discarded bit. * * **Description**:\n * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results. * * **Operations**:\n * ~~~ * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) { * Rd.W[x] = 0x7fffffff; * OV = 1; * } else { * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0]; * if (`.u` form) { * Round[x][32:0] = Mres[x][46:14] + 1; * Rd.W[x] = Round[x][32:1]; * } else { * Rd.W[x] = Mres[x][46:15]; * } * } * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMWB2(long a, unsigned long b) { long result; __ASM volatile("kmmwb2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.49.1. KMMWB2 ===== */ /* ===== Inline Function Start for 3.49.2. KMMWB2.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief KMMWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 with Rounding) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMWB2 Rd, Rs1, Rs2 * KMMWB2.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the * corresponding 32-bit elements of another register, double the multiplication results and write the * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u` * form rounds up the results from the most significant discarded bit. * * **Description**:\n * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results. * * **Operations**:\n * ~~~ * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) { * Rd.W[x] = 0x7fffffff; * OV = 1; * } else { * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0]; * if (`.u` form) { * Round[x][32:0] = Mres[x][46:14] + 1; * Rd.W[x] = Round[x][32:1]; * } else { * Rd.W[x] = Mres[x][46:15]; * } * } * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMWB2_U(long a, unsigned long b) { long result; __ASM volatile("kmmwb2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.49.2. KMMWB2.u ===== */ /* ===== Inline Function Start for 3.50.1. KMMWT2 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief KMMWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMWT2 Rd, Rs1, Rs2 * KMMWT2.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the * corresponding 32-bit elements of another register, double the multiplication results and write the * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u` * form rounds up the results from the most significant discarded bit. * * **Description**:\n * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results. * * **Operations**:\n * ~~~ * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) { * Rd.W[x] = 0x7fffffff; * OV = 1; * } else { * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1]; * if (`.u` form) { * Round[x][32:0] = Mres[x][46:14] + 1; * Rd.W[x] = Round[x][32:1]; * } else { * Rd.W[x] = Mres[x][46:15]; * } * } * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMWT2(long a, unsigned long b) { long result; __ASM volatile("kmmwt2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.50.1. KMMWT2 ===== */ /* ===== Inline Function Start for 3.50.2. KMMWT2.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief KMMWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 with Rounding) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMMWT2 Rd, Rs1, Rs2 * KMMWT2.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the * corresponding 32-bit elements of another register, double the multiplication results and write the * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u` * form rounds up the results from the most significant discarded bit. * * **Description**:\n * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results. * * **Operations**:\n * ~~~ * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) { * Rd.W[x] = 0x7fffffff; * OV = 1; * } else { * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1]; * if (`.u` form) { * Round[x][32:0] = Mres[x][46:14] + 1; * Rd.W[x] = Round[x][32:1]; * } else { * Rd.W[x] = Mres[x][46:15]; * } * } * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMMWT2_U(long a, unsigned long b) { long result; __ASM volatile("kmmwt2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.50.2. KMMWT2.u ===== */ /* ===== Inline Function Start for 3.51.1. KMSDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief KMSDA (SIMD Saturating Signed Multiply Two Halfs & Add & Subtract) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMSDA Rd, Rs1, Rs2 * KMSXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The * subtraction result may be saturated. * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element) * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element) * * **Description**:\n * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2. * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32- * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The * 16-bit contents are treated as signed integers. * * **Operations**:\n * ~~~ * // KMSDA * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); * // KMSXDA * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMSDA(long t, unsigned long a, unsigned long b) { __ASM volatile("kmsda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.51.1. KMSDA ===== */ /* ===== Inline Function Start for 3.51.2. KMSXDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief KMSXDA (SIMD Saturating Signed Crossed Multiply Two Halfs & Add & Subtract) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KMSDA Rd, Rs1, Rs2 * KMSXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The * subtraction result may be saturated. * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element) * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element) * * **Description**:\n * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2. * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32- * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The * 16-bit contents are treated as signed integers. * * **Operations**:\n * ~~~ * // KMSDA * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); * // KMSXDA * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMSXDA(long t, unsigned long a, unsigned long b) { __ASM volatile("kmsxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.51.2. KMSXDA ===== */ /* ===== Inline Function Start for 3.52. KMSR64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB * \brief KMSR64 (Signed Multiply and Saturating Subtract from 64-Bit Data) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * KMSR64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64). * * **RV32 Description**:\n * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers * specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the Q63 * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated * result is written back to the even/odd pair of registers specified by Rd(4,1). * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It * subtracts the 64-bit multiplication results from the 64-bit signed data in Rd with unlimited * precision. If the 64-bit subtraction result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd. * * **Operations**:\n * ~~~ * RV32: * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * result = R[t_H].R[t_L] - (Rs1 * Rs2); * if (result > (2^63)-1) { * result = (2^63)-1; OV = 1; * } else if (result < -2^63) { * result = -2^63; OV = 1; * } * R[t_H].R[t_L] = result; * RV64: * // `result` has unlimited precision * result = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); * if (result > (2^63)-1) { * result = (2^63)-1; OV = 1; * } else if (result < -2^63) { * result = -2^63; OV = 1; * } * Rd = result; * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a long type of value stored in a * \param [in] b long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_KMSR64(long long t, long a, long b) { __ASM volatile("kmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.52. KMSR64 ===== */ /* ===== Inline Function Start for 3.53. KSLLW ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief KSLLW (Saturating Shift Left Logical for Word) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KSLLW Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do logical left shift operation with saturation on a 32-bit word. The shift amount is a * variable from a GPR. * * **Description**:\n * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with * zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register. Any * shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated * to -2^31. And the saturated result is sign-extended and written to Rd. If any saturation is performed, * set OV bit to 1. * * **Operations**:\n * ~~~ * sa = Rs2[4:0]; * res[(31+sa):0] = Rs1.W[0] << sa; * if (res > (2^31)-1) { * res = 0x7fffffff; OV = 1; * } else if (res < -2^31) { * res = 0x80000000; OV = 1; * } * Rd[31:0] = res[31:0]; // RV32 * Rd[63:0] = SE(res[31:0]); // RV64 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KSLLW(long a, unsigned int b) { long result; __ASM volatile("ksllw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.53. KSLLW ===== */ /* ===== Inline Function Start for 3.54. KSLLIW ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief KSLLIW (Saturating Shift Left Logical Immediate for Word) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KSLLIW Rd, Rs1, imm5u * ~~~ * * **Purpose**:\n * Do logical left shift operation with saturation on a 32-bit word. The shift amount is an * immediate value. * * **Description**:\n * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with * zero and the shift amount is specified by the imm5u constant. Any shifted value greater than 2^31-1 is * saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated result is * sign-extended and written to Rd. If any saturation is performed, set OV bit to 1. * * **Operations**:\n * ~~~ * sa = imm5u; * res[(31+sa):0] = Rs1.W[0] << sa; * if (res > (2^31)-1) { * res = 0x7fffffff; OV = 1; * } else if (res < -2^31) { * res = 0x80000000; OV = 1; * } * Rd[31:0] = res[31:0]; // RV32 * Rd[63:0] = SE(res[31:0]); // RV64 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ #define __RV_KSLLIW(a, b) \ ({ \ long result; \ long __a = (long)(a); \ __ASM volatile("kslliw %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.54. KSLLIW ===== */ /* ===== Inline Function Start for 3.55. KSLL8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT * \brief KSLL8 (SIMD 8-bit Saturating Shift Left Logical) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KSLL8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift * amount is a variable from a GPR. * * **Description**:\n * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled * with zero and the shift amount is specified by the low-order 3-bits of the value in the Rs2 register. * Any shifted value greater than 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is * saturated to -2^7. And the saturated results are written to Rd. If any saturation is performed, set OV * bit to 1. * * **Operations**:\n * ~~~ * sa = Rs2[2:0]; * if (sa != 0) { * res[(7+sa):0] = Rs1.B[x] << sa; * if (res > (2^7)-1) { * res = 0x7f; OV = 1; * } else if (res < -2^7) { * res = 0x80; OV = 1; * } * Rd.B[x] = res[7:0]; * } else { * Rd = Rs1; * } * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSLL8(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("ksll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.55. KSLL8 ===== */ /* ===== Inline Function Start for 3.56. KSLLI8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT * \brief KSLLI8 (SIMD 8-bit Saturating Shift Left Logical Immediate) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KSLLI8 Rd, Rs1, imm3u * ~~~ * * **Purpose**:\n * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift * amount is an immediate value. * * **Description**:\n * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled * with zero and the shift amount is specified by the imm3u constant. Any shifted value greater than * 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is saturated to -2^7. And the saturated * results are written to Rd. If any saturation is performed, set OV bit to 1. * * **Operations**:\n * ~~~ * sa = imm3u[2:0]; * if (sa != 0) { * res[(7+sa):0] = Rs1.B[x] << sa; * if (res > (2^7)-1) { * res = 0x7f; OV = 1; * } else if (res < -2^7) { * res = 0x80; OV = 1; * } * Rd.B[x] = res[7:0]; * } else { * Rd = Rs1; * } * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_KSLLI8(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("kslli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.56. KSLLI8 ===== */ /* ===== Inline Function Start for 3.57. KSLL16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT * \brief KSLL16 (SIMD 16-bit Saturating Shift Left Logical) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KSLL16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift * amount is a variable from a GPR. * * **Description**:\n * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled * with zero and the shift amount is specified by the low-order 4-bits of the value in the Rs2 register. * Any shifted value greater than 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is * saturated to -2^15. And the saturated results are written to Rd. If any saturation is performed, set OV * bit to 1. * * **Operations**:\n * ~~~ * sa = Rs2[3:0]; * if (sa != 0) { * res[(15+sa):0] = Rs1.H[x] << sa; * if (res > (2^15)-1) { * res = 0x7fff; OV = 1; * } else if (res < -2^15) { * res = 0x8000; OV = 1; * } * Rd.H[x] = res[15:0]; * } else { * Rd = Rs1; * } * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSLL16(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("ksll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.57. KSLL16 ===== */ /* ===== Inline Function Start for 3.58. KSLLI16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT * \brief KSLLI16 (SIMD 16-bit Saturating Shift Left Logical Immediate) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KSLLI16 Rd, Rs1, imm4u * ~~~ * * **Purpose**:\n * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift * amount is an immediate value. * * **Description**:\n * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled * with zero and the shift amount is specified by the imm4u constant. Any shifted value greater than * 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is saturated to -2^15. And the saturated * results are written to Rd. If any saturation is performed, set OV bit to 1. * * **Operations**:\n * ~~~ * sa = imm4u[3:0]; * if (sa != 0) { * res[(15+sa):0] = Rs1.H[x] << sa; * if (res > (2^15)-1) { * res = 0x7fff; OV = 1; * } else if (res < -2^15) { * res = 0x8000; OV = 1; * } * Rd.H[x] = res[15:0]; * } else { * Rd = Rs1; * } * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_KSLLI16(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("kslli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.58. KSLLI16 ===== */ /* ===== Inline Function Start for 3.59.1. KSLRA8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT * \brief KSLRA8 (SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KSLRA8 Rd, Rs1, Rs2 * KSLRA8.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the * right shift. * * **Description**:\n * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`. * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit * position for rounding effect. After the shift, saturation, or rounding, the final results are written to * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect * this instruction. * * **Operations**:\n * ~~~ * if (Rs2[3:0] < 0) { * sa = -Rs2[3:0]; * sa = (sa == 8)? 7 : sa; * if (`.u` form) { * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1; * Rd.B[x] = res[7:0]; * } else { * Rd.B[x] = SE8(Rs1.B[x][7:sa]); * } * } else { * sa = Rs2[2:0]; * res[(7+sa):0] = Rs1.B[x] <<(logic) sa; * if (res > (2^7)-1) { * res[7:0] = 0x7f; OV = 1; * } else if (res < -2^7) { * res[7:0] = 0x80; OV = 1; * } * Rd.B[x] = res[7:0]; * } * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSLRA8(unsigned long a, int b) { unsigned long result; __ASM volatile("kslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.59.1. KSLRA8 ===== */ /* ===== Inline Function Start for 3.59.2. KSLRA8.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT * \brief KSLRA8.u (SIMD 8-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KSLRA8 Rd, Rs1, Rs2 * KSLRA8.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the * right shift. * * **Description**:\n * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`. * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit * position for rounding effect. After the shift, saturation, or rounding, the final results are written to * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect * this instruction. * * **Operations**:\n * ~~~ * if (Rs2[3:0] < 0) { * sa = -Rs2[3:0]; * sa = (sa == 8)? 7 : sa; * if (`.u` form) { * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1; * Rd.B[x] = res[7:0]; * } else { * Rd.B[x] = SE8(Rs1.B[x][7:sa]); * } * } else { * sa = Rs2[2:0]; * res[(7+sa):0] = Rs1.B[x] <<(logic) sa; * if (res > (2^7)-1) { * res[7:0] = 0x7f; OV = 1; * } else if (res < -2^7) { * res[7:0] = 0x80; OV = 1; * } * Rd.B[x] = res[7:0]; * } * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSLRA8_U(unsigned long a, int b) { unsigned long result; __ASM volatile("kslra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.59.2. KSLRA8.u ===== */ /* ===== Inline Function Start for 3.60.1. KSLRA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT * \brief KSLRA16 (SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KSLRA16 Rd, Rs1, Rs2 * KSLRA16.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the * right shift. * * **Description**:\n * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`. * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u` * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit * position for rounding effect. After the shift, saturation, or rounding, the final results are written to * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect * this instruction. * * **Operations**:\n * ~~~ * if (Rs2[4:0] < 0) { * sa = -Rs2[4:0]; * sa = (sa == 16)? 15 : sa; * if (`.u` form) { * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1; * Rd.H[x] = res[15:0]; * } else { * Rd.H[x] = SE16(Rs1.H[x][15:sa]); * } * } else { * sa = Rs2[3:0]; * res[(15+sa):0] = Rs1.H[x] <<(logic) sa; * if (res > (2^15)-1) { * res[15:0] = 0x7fff; OV = 1; * } else if (res < -2^15) { * res[15:0] = 0x8000; OV = 1; * } * d.H[x] = res[15:0]; * } * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSLRA16(unsigned long a, int b) { unsigned long result; __ASM volatile("kslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.60.1. KSLRA16 ===== */ /* ===== Inline Function Start for 3.60.2. KSLRA16.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT * \brief KSLRA16.u (SIMD 16-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KSLRA16 Rd, Rs1, Rs2 * KSLRA16.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the * right shift. * * **Description**:\n * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`. * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u` * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit * position for rounding effect. After the shift, saturation, or rounding, the final results are written to * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect * this instruction. * * **Operations**:\n * ~~~ * if (Rs2[4:0] < 0) { * sa = -Rs2[4:0]; * sa = (sa == 16)? 15 : sa; * if (`.u` form) { * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1; * Rd.H[x] = res[15:0]; * } else { * Rd.H[x] = SE16(Rs1.H[x][15:sa]); * } * } else { * sa = Rs2[3:0]; * res[(15+sa):0] = Rs1.H[x] <<(logic) sa; * if (res > (2^15)-1) { * res[15:0] = 0x7fff; OV = 1; * } else if (res < -2^15) { * res[15:0] = 0x8000; OV = 1; * } * d.H[x] = res[15:0]; * } * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSLRA16_U(unsigned long a, int b) { unsigned long result; __ASM volatile("kslra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.60.2. KSLRA16.u ===== */ /* ===== Inline Function Start for 3.61. KSLRAW ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief KSLRAW (Shift Left Logical with Q31 Saturation or Shift Right Arithmetic) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KSLRAW Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31 * saturation for the left shift on a 32-bit data. * * **Description**:\n * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31]. * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. After the shift * operation, the final result is bit-31 sign-extended and written to Rd. If any saturation happens, this * instruction sets the OV flag. The value of Rs2[31:6] will not affected the operation of this instruction. * * **Operations**:\n * ~~~ * if (Rs2[5:0] < 0) { * sa = -Rs2[5:0]; * sa = (sa == 32)? 31 : sa; * res[31:0] = Rs1.W[0] >>(arith) sa; * } else { * sa = Rs2[5:0]; * tmp = Rs1.W[0] <<(logic) sa; * if (tmp > (2^31)-1) { * res[31:0] = (2^31)-1; * OV = 1; * } else if (tmp < -2^31) { * res[31:0] = -2^31; * OV = 1 * } else { * res[31:0] = tmp[31:0]; * } * } * Rd = res[31:0]; // RV32 * Rd = SE64(res[31:0]); // RV64 * ~~~ * * \param [in] a int type of value stored in a * \param [in] b int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KSLRAW(int a, int b) { long result; __ASM volatile("kslraw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.61. KSLRAW ===== */ /* ===== Inline Function Start for 3.62. KSLRAW.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief KSLRAW.u (Shift Left Logical with Q31 Saturation or Rounding Shift Right Arithmetic) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KSLRAW.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31 * saturation for the left shift and a rounding up operation for the right shift on a 32-bit data. * * **Description**:\n * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31]. * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. The right-shifted * result is added a 1 to the most significant discarded bit position for rounding effect. After the shift, * saturation, or rounding, the final result is bit-31 sign-extended and written to Rd. If any saturation * happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect the operation of this * instruction. * * **Operations**:\n * ~~~ * if (Rs2[5:0] < 0) { * sa = -Rs2[5:0]; * sa = (sa == 32)? 31 : sa; * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1; * rst[31:0] = res[31:0]; * } else { * sa = Rs2[5:0]; * tmp = Rs1.W[0] <<(logic) sa; * if (tmp > (2^31)-1) { * rst[31:0] = (2^31)-1; * OV = 1; * } else if (tmp < -2^31) { * rst[31:0] = -2^31; * OV = 1 * } else { * rst[31:0] = tmp[31:0]; * } * } * Rd = rst[31:0]; // RV32 * Rd = SE64(rst[31:0]); // RV64 * ~~~ * * \param [in] a int type of value stored in a * \param [in] b int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KSLRAW_U(int a, int b) { long result; __ASM volatile("kslraw.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.62. KSLRAW.u ===== */ /* ===== Inline Function Start for 3.63. KSTAS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief KSTAS16 (SIMD 16-bit Signed Saturating Straight Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KSTAS16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element saturating addition and 16-bit signed integer element * saturating subtraction in a 32-bit chunk simultaneously. Operands are from corresponding * positions in 32-bit chunks. * * **Description**:\n * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it * subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for * subtraction. * * **Operations**:\n * ~~~ * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16]; * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0]; * for (res in [res1, res2]) { * if (res > (2^15)-1) { * res = (2^15)-1; * OV = 1; * } else if (res < -2^15) { * res = -2^15; * OV = 1; * } * } * Rd.W[x][31:16] = res1; * Rd.W[x][15:0] = res2; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSTAS16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("kstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.63. KSTAS16 ===== */ /* ===== Inline Function Start for 3.64. KSTSA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief KSTSA16 (SIMD 16-bit Signed Saturating Straight Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KSTSA16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element * saturating addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in * 32-bit chunks. * * **Description**:\n * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it * adds the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 with the 16-bit signed integer * element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number range (-2^15 * <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are * written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd for * addition. * * **Operations**:\n * ~~~ * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16]; * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0]; * for (res in [res1, res2]) { * if (res > (2^15)-1) { * res = (2^15)-1; * OV = 1; * } else if (res < -2^15) { * res = -2^15; * OV = 1; * } * } * Rd.W[x][31:16] = res1; * Rd.W[x][15:0] = res2; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSTSA16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("kstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.64. KSTSA16 ===== */ /* ===== Inline Function Start for 3.65. KSUB8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB * \brief KSUB8 (SIMD 8-bit Signed Saturating Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KSUB8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit signed elements saturating subtractions simultaneously. * * **Description**:\n * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 27 * -1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.B[x] - Rs2.B[x]; * if (res[x] > (2^7)-1) { * res[x] = (2^7)-1; * OV = 1; * } else if (res[x] < -2^7) { * res[x] = -2^7; * OV = 1; * } * Rd.B[x] = res[x]; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSUB8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.65. KSUB8 ===== */ /* ===== Inline Function Start for 3.66. KSUB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief KSUB16 (SIMD 16-bit Signed Saturating Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KSUB16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer elements saturating subtractions simultaneously. * * **Description**:\n * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to * Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.H[x] - Rs2.H[x]; * if (res[x] > (2^15)-1) { * res[x] = (2^15)-1; * OV = 1; * } else if (res[x] < -2^15) { * res[x] = -2^15; * OV = 1; * } * Rd.H[x] = res[x]; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSUB16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.66. KSUB16 ===== */ /* ===== Inline Function Start for 3.67. KSUB64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB * \brief KSUB64 (64-bit Signed Saturating Subtraction) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * KSUB64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Perform a 64-bit signed integer subtraction. The result is saturated to the Q63 range. * * **RV32 Description**:\n * This instruction subtracts the 64-bit signed integer of an even/odd pair of * registers specified by Rs2(4,1) from the 64-bit signed integer of an even/odd pair of registers * specified by Rs1(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd * pair of registers specified by Rd(4,1). * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` * register of the pair contains the low 32-bit of the operand. * * **RV64 Description**:\n * This instruction subtracts the 64-bit signed integer of Rs2 from the 64-bit signed * integer of Rs1. If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated * to the range and the OV bit is set to 1. The saturated result is then written to Rd. * * **Operations**:\n * ~~~ * RV32: * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1); * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1); * result = R[a_H].R[a_L] - R[b_H].R[b_L]; * if (result > (2^63)-1) { * result = (2^63)-1; OV = 1; * } else if (result < -2^63) { * result = -2^63; OV = 1; * } * R[t_H].R[t_L] = result; * RV64: * result = Rs1 - Rs2; * if (result > (2^63)-1) { * result = (2^63)-1; OV = 1; * } else if (result < -2^63) { * result = -2^63; OV = 1; * } * Rd = result; * ~~~ * * \param [in] a long long type of value stored in a * \param [in] b long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_KSUB64(long long a, long long b) { long long result; __ASM volatile("ksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.67. KSUB64 ===== */ /* ===== Inline Function Start for 3.68. KSUBH ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU * \brief KSUBH (Signed Subtraction with Q15 Saturation) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KSUBH Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Subtract the signed lower 32-bit content of two registers with Q15 saturation. * * **Description**:\n * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit * content of Rs1. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag. * * **Operations**:\n * ~~~ * tmp = Rs1.W[0] - Rs2.W[0]; * if (tmp > (2^15)-1) { * res = (2^15)-1; * OV = 1; * } else if (tmp < -2^15) { * res = -2^15; * OV = 1 * } else { * res = tmp; * } * Rd = SE(res[15:0]); * ~~~ * * \param [in] a int type of value stored in a * \param [in] b int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KSUBH(int a, int b) { long result; __ASM volatile("ksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.68. KSUBH ===== */ /* ===== Inline Function Start for 3.69. KSUBW ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief KSUBW (Signed Subtraction with Q31 Saturation) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * KSUBW Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Subtract the signed lower 32-bit content of two registers with Q31 saturation. * * **Description**:\n * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit * content of Rs1. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then * sign-extened and written to Rd. If saturation happens, this instruction sets the OV flag. * * **Operations**:\n * ~~~ * tmp = Rs1.W[0] - Rs2.W[0]; * if (tmp > (2^31)-1) { * res = (2^31)-1; * OV = 1; * } else if (tmp < -2^31) { * res = -2^31; * OV = 1 * } else { * res = tmp; * } * Rd = res[31:0]; // RV32 * Rd = SE(res[31:0]); // RV64 * ~~~ * * \param [in] a int type of value stored in a * \param [in] b int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KSUBW(int a, int b) { long result; __ASM volatile("ksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.69. KSUBW ===== */ /* ===== Inline Function Start for 3.70.1. KWMMUL ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC * \brief KWMMUL (SIMD Saturating MSW Signed Multiply Word & Double) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KWMMUL Rd, Rs1, Rs2 * KWMMUL.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit, * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally * rounds up the multiplication results from the most signification discarded bit. * * **Description**:\n * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts * the multiplication results one bit to the left and takes the most significant 32-bit results. If the * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u` * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit * 30 before the shift and saturation operations. * * **Operations**:\n * ~~~ * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) { * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; * if (`.u` form) { * Round[x][33:0] = Mres[x][63:30] + 1; * Rd.W[x] = Round[x][32:1]; * } else { * Rd.W[x] = Mres[x][62:31]; * } * } else { * Rd.W[x] = 0x7fffffff; * OV = 1; * } * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KWMMUL(long a, long b) { long result; __ASM volatile("kwmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.70.1. KWMMUL ===== */ /* ===== Inline Function Start for 3.70.2. KWMMUL.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC * \brief KWMMUL.u (SIMD Saturating MSW Signed Multiply Word & Double with Rounding) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * KWMMUL Rd, Rs1, Rs2 * KWMMUL.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit, * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally * rounds up the multiplication results from the most signification discarded bit. * * **Description**:\n * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts * the multiplication results one bit to the left and takes the most significant 32-bit results. If the * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u` * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit * 30 before the shift and saturation operations. * * **Operations**:\n * ~~~ * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) { * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; * if (`.u` form) { * Round[x][33:0] = Mres[x][63:30] + 1; * Rd.W[x] = Round[x][32:1]; * } else { * Rd.W[x] = Mres[x][62:31]; * } * } else { * Rd.W[x] = 0x7fffffff; * OV = 1; * } * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KWMMUL_U(long a, long b) { long result; __ASM volatile("kwmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.70.2. KWMMUL.u ===== */ /* ===== Inline Function Start for 3.71. MADDR32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC * \brief MADDR32 (Multiply and Add to 32-Bit Word) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * MADDR32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the 32-bit contents of two registers and add the lower 32-bit multiplication result * to the 32-bit content of a destination register. Write the final result back to the destination register. * * **Description**:\n * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2. It adds the * lower 32-bit multiplication result to the lower 32-bit content of Rd and writes the final result (RV32) * or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either signed or * unsigned integers. * * **Operations**:\n * ~~~ * RV32: * Mresult = Rs1 * Rs2; * Rd = Rd + Mresult.W[0]; * RV64: * Mresult = Rs1.W[0] * Rs2.W[0]; * tres[31:0] = Rd.W[0] + Mresult.W[0]; * Rd = SE64(tres[31:0]); * ~~~ * * \param [in] t unsigned long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_MADDR32(unsigned long t, unsigned long a, unsigned long b) { __ASM volatile("maddr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.71. MADDR32 ===== */ /* ===== Inline Function Start for 3.72. MAXW ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION * \brief MAXW (32-bit Signed Word Maximum) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * MAXW Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Get the larger value from the 32-bit contents of two general registers. * * **Description**:\n * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the * larger value as the result, and writes the result to Rd. * * **Operations**:\n * ~~~ * if (Rs1.W[0] >= Rs2.W[0]) { * Rd = SE(Rs1.W[0]); * } else { * Rd = SE(Rs2.W[0]); * } * ~~~ * * \param [in] a int type of value stored in a * \param [in] b int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_MAXW(int a, int b) { long result; __ASM volatile("maxw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.72. MAXW ===== */ /* ===== Inline Function Start for 3.73. MINW ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION * \brief MINW (32-bit Signed Word Minimum) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * MINW Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Get the smaller value from the 32-bit contents of two general registers. * * **Description**:\n * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the * smaller value as the result, and writes the result to Rd. * * **Operations**:\n * ~~~ * if (Rs1.W[0] >= Rs2.W[0]) { Rd = SE(Rs2.W[0]); } else { Rd = SE(Rs1.W[0]); } * ~~~ * * \param [in] a int type of value stored in a * \param [in] b int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_MINW(int a, int b) { long result; __ASM volatile("minw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.73. MINW ===== */ /* ===== Inline Function Start for 3.74. MSUBR32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC * \brief MSUBR32 (Multiply and Subtract from 32-Bit Word) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * MSUBR32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the 32-bit contents of two registers and subtract the lower 32-bit multiplication * result from the 32-bit content of a destination register. Write the final result back to the destination * register. * * **Description**:\n * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2, subtracts * the lower 32-bit multiplication result from the lower 32-bit content of Rd, then writes the final * result (RV32) or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either * signed or unsigned integers. * * **Operations**:\n * ~~~ * RV32: * Mresult = Rs1 * Rs2; * Rd = Rd - Mresult.W[0]; * RV64: * Mresult = Rs1.W[0] * Rs2.W[0]; * tres[31:0] = Rd.W[0] - Mresult.W[0]; * Rd = SE64(tres[31:0]); * ~~~ * * \param [in] t unsigned long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_MSUBR32(unsigned long t, unsigned long a, unsigned long b) { __ASM volatile("msubr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.74. MSUBR32 ===== */ /* ===== Inline Function Start for 3.75. MULR64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION * \brief MULR64 (Multiply Word Unsigned to 64-bit Data) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * MULR64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the 32-bit unsigned integer contents of two registers and write the 64-bit result. * * **RV32 Description**:\n * This instruction multiplies the 32-bit content of Rs1 with that of Rs2 and writes the 64-bit * multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d determines the * even/odd pair group of the two registers. Specifically, the register pair includes register 2d and * 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers. * * **RV64 Description**:\n * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2 and writes the 64-bit * multiplication result to Rd. * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers. * * **Operations**:\n * ~~~ * RV32: * Mresult = CONCAT(1`b0,Rs1) u* CONCAT(1`b0,Rs2); * R[Rd(4,1).1(0)][31:0] = Mresult[63:32]; * R[Rd(4,1).0(0)][31:0] = Mresult[31:0]; * RV64: * Rd = Mresult[63:0]; * Mresult = CONCAT(1`b0,Rs1.W[0]) u* CONCAT(1`b0,Rs2.W[0]); * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_MULR64(unsigned long a, unsigned long b) { unsigned long long result; __ASM volatile("mulr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.75. MULR64 ===== */ /* ===== Inline Function Start for 3.76. MULSR64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION * \brief MULSR64 (Multiply Word Signed to 64-bit Data) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * MULSR64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the 32-bit signed integer contents of two registers and write the 64-bit result. * * **RV32 Description**:\n * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and * writes the 64-bit multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d * determines the even/odd pair group of the two registers. Specifically, the register pair includes * register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **RV64 Description**:\n * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and * writes the 64-bit multiplication result to Rd. * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * RV32: * Mresult = Ra s* Rb; * R[Rd(4,1).1(0)][31:0] = Mresult[63:32]; * R[Rd(4,1).0(0)][31:0] = Mresult[31:0]; * RV64: * Mresult = Ra.W[0] s* Rb.W[0]; * Rd = Mresult[63:0]; * ~~~ * * \param [in] a long type of value stored in a * \param [in] b long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_MULSR64(long a, long b) { long long result; __ASM volatile("mulsr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.76. MULSR64 ===== */ /* ===== Inline Function Start for 3.77. PBSAD ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC * \brief PBSAD (Parallel Byte Sum of Absolute Difference) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * PBSAD Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Calculate the sum of absolute difference of unsigned 8-bit data elements. * * **Description**:\n * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. Then * it adds the absolute value of each difference together and writes the result to Rd. * * **Operations**:\n * ~~~ * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]); * Rd = SUM(absdiff[x]); * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_PBSAD(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("pbsad %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.77. PBSAD ===== */ /* ===== Inline Function Start for 3.78. PBSADA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC * \brief PBSADA (Parallel Byte Sum of Absolute Difference Accum) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * PBSADA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Calculate the sum of absolute difference of four unsigned 8-bit data elements and * accumulate it into a register. * * **Description**:\n * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. It * then adds the absolute value of each difference together along with the content of Rd and writes the * accumulated result back to Rd. * * **Operations**:\n * ~~~ * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]); * Rd = Rd + SUM(absdiff[x]); * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] t unsigned long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_PBSADA(unsigned long t, unsigned long a, unsigned long b) { __ASM volatile("pbsada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.78. PBSADA ===== */ /* ===== Inline Function Start for 3.79.1. PKBB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK * \brief PKBB16 (Pack Two 16-bit Data from Both Bottom Half) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * PKBB16 Rd, Rs1, Rs2 * PKBT16 Rd, Rs1, Rs2 * PKTT16 Rd, Rs1, Rs2 * PKTB16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Pack 16-bit data from 32-bit chunks in two registers. * * PKBB16: bottom.bottom * * PKBT16 bottom.top * * PKTT16 top.top * * PKTB16 top.bottom * * **Description**:\n * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to * Rd.W[x] [15:0]. * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0]. * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16 * for RV32: x=0, * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_PKBB16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("pkbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.79.1. PKBB16 ===== */ /* ===== Inline Function Start for 3.79.2. PKBT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK * \brief PKBT16 (Pack Two 16-bit Data from Bottom and Top Half) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * PKBB16 Rd, Rs1, Rs2 * PKBT16 Rd, Rs1, Rs2 * PKTT16 Rd, Rs1, Rs2 * PKTB16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Pack 16-bit data from 32-bit chunks in two registers. * * PKBB16: bottom.bottom * * PKBT16 bottom.top * * PKTT16 top.top * * PKTB16 top.bottom * * **Description**:\n * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to * Rd.W[x] [15:0]. * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0]. * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16 * for RV32: x=0, * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_PKBT16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("pkbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.79.2. PKBT16 ===== */ /* ===== Inline Function Start for 3.79.3. PKTT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK * \brief PKTT16 (Pack Two 16-bit Data from Both Top Half) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * PKBB16 Rd, Rs1, Rs2 * PKBT16 Rd, Rs1, Rs2 * PKTT16 Rd, Rs1, Rs2 * PKTB16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Pack 16-bit data from 32-bit chunks in two registers. * * PKBB16: bottom.bottom * * PKBT16 bottom.top * * PKTT16 top.top * * PKTB16 top.bottom * * **Description**:\n * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to * Rd.W[x] [15:0]. * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0]. * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16 * for RV32: x=0, * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_PKTT16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("pktt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.79.3. PKTT16 ===== */ /* ===== Inline Function Start for 3.79.4. PKTB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK * \brief PKTB16 (Pack Two 16-bit Data from Top and Bottom Half) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * PKBB16 Rd, Rs1, Rs2 * PKBT16 Rd, Rs1, Rs2 * PKTT16 Rd, Rs1, Rs2 * PKTB16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Pack 16-bit data from 32-bit chunks in two registers. * * PKBB16: bottom.bottom * * PKBT16 bottom.top * * PKTT16 top.top * * PKTB16 top.bottom * * **Description**:\n * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to * Rd.W[x] [15:0]. * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0]. * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16 * for RV32: x=0, * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_PKTB16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("pktb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.79.4. PKTB16 ===== */ /* ===== Inline Function Start for 3.80. RADD8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB * \brief RADD8 (SIMD 8-bit Signed Halving Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * RADD8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit signed integer element additions simultaneously. The element results are halved * to avoid overflow or saturation. * * **Description**:\n * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to * Rd. * * **Examples**:\n * ~~~ * * Rs1 = 0x7F, Rs2 = 0x7F, Rd = 0x7F * * Rs1 = 0x80, Rs2 = 0x80, Rd = 0x80 * * Rs1 = 0x40, Rs2 = 0x80, Rd = 0xE0 * ~~~ * * **Operations**:\n * ~~~ * Rd.B[x] = (Rs1.B[x] + Rs2.B[x]) s>> 1; for RV32: x=3...0, for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RADD8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("radd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.80. RADD8 ===== */ /* ===== Inline Function Start for 3.81. RADD16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief RADD16 (SIMD 16-bit Signed Halving Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * RADD16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element additions simultaneously. The results are halved to avoid * overflow or saturation. * * **Description**:\n * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to * Rd. * * **Examples**:\n * ~~~ * * Rs1 = 0x7FFF, Rs2 = 0x7FFF, Rd = 0x7FFF * * Rs1 = 0x8000, Rs2 = 0x8000, Rd = 0x8000 * * Rs1 = 0x4000, Rs2 = 0x8000, Rd = 0xE000 * ~~~ * * **Operations**:\n * ~~~ * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) s>> 1; for RV32: x=1...0, for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RADD16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("radd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.81. RADD16 ===== */ /* ===== Inline Function Start for 3.82. RADD64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB * \brief RADD64 (64-bit Signed Halving Addition) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * RADD64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Add two 64-bit signed integers. The result is halved to avoid overflow or saturation. * * **RV32 Description**:\n * This instruction adds the 64-bit signed integer of an even/odd pair of registers * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by * Rs2(4,1). The 64-bit addition result is first arithmetically right-shifted by 1 bit and then written to an * even/odd pair of registers specified by Rd(4,1). * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register * pair includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed * integer in Rs2. The 64-bit addition result is first arithmetically right-shifted by 1 bit and then * written to Rd. * * **Operations**:\n * ~~~ * RV32: * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1); * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1); * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) s>> 1; * RV64: * Rd = (Rs1 + Rs2) s>> 1; * ~~~ * * \param [in] a long long type of value stored in a * \param [in] b long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_RADD64(long long a, long long b) { long long result; __ASM volatile("radd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.82. RADD64 ===== */ /* ===== Inline Function Start for 3.83. RADDW ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION * \brief RADDW (32-bit Signed Halving Addition) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * RADDW Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Add 32-bit signed integers and the results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction adds the first 32-bit signed integer in Rs1 with the first 32-bit signed * integer in Rs2. The result is first arithmetically right-shifted by 1 bit and then sign-extended and * written to Rd. * * **Examples**:\n * ~~~ * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF, Rd = 0x7FFFFFFF * * Rs1 = 0x80000000, Rs2 = 0x80000000, Rd = 0x80000000 * * Rs1 = 0x40000000, Rs2 = 0x80000000, Rd = 0xE0000000 * ~~~ * * **Operations**:\n * ~~~ * RV32: * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1; * RV64: * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1; * Rd[63:0] = SE(resw[31:0]); * ~~~ * * \param [in] a int type of value stored in a * \param [in] b int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_RADDW(int a, int b) { long result; __ASM volatile("raddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.83. RADDW ===== */ /* ===== Inline Function Start for 3.84. RCRAS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief RCRAS16 (SIMD 16-bit Signed Halving Cross Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * RCRAS16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results * are halved to avoid overflow or saturation. * * **Description**:\n * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit * signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer element in * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd. * * **Examples**:\n * ~~~ * Please see `RADD16` and `RSUB16` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) s>> 1; * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) s>> 1; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RCRAS16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("rcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.84. RCRAS16 ===== */ /* ===== Inline Function Start for 3.85. RCRSA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief RCRSA16 (SIMD 16-bit Signed Halving Cross Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * RCRSA16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results * are halved to avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in * [31:16] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd. * * **Examples**:\n * ~~~ * Please see `RADD16` and `RSUB16` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) s>> 1; * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) s>> 1; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RCRSA16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("rcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.85. RCRSA16 ===== */ /* ===== Inline Function Start for 3.86. RDOV ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC * \brief RDOV (Read OV flag) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * RDOV Rd # pseudo mnemonic * ~~~ * * **Purpose**:\n * This pseudo instruction is an alias to `CSRR Rd, ucode` instruction which maps to the real * instruction of `CSRRS Rd, ucode, x0`. * * * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RDOV(void) { unsigned long result; __ASM volatile("rdov %0" : "=r"(result)); return result; } /* ===== Inline Function End for 3.86. RDOV ===== */ /* ===== Inline Function Start for 3.87. RSTAS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief RSTAS16 (SIMD 16-bit Signed Halving Straight Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * RSTAS16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The * results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit * signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer element in * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd. * * **Examples**:\n * ~~~ * Please see `RADD16` and `RSUB16` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) s>> 1; * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) s>> 1; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RSTAS16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("rstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.87. RSTAS16 ===== */ /* ===== Inline Function Start for 3.88. RSTSA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief RSTSA16 (SIMD 16-bit Signed Halving Straight Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * RSTSA16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The * results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in * [15:0] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and then * written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd. * * **Examples**:\n * ~~~ * Please see `RADD16` and `RSUB16` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) s>> 1; * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) s>> 1; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RSTSA16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("rstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.88. RSTSA16 ===== */ /* ===== Inline Function Start for 3.89. RSUB8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB * \brief RSUB8 (SIMD 8-bit Signed Halving Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * RSUB8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit signed integer element subtractions simultaneously. The results are halved to * avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then * written to Rd. * * **Examples**:\n * ~~~ * * Rs1 = 0x7F, Rs2 = 0x80, Rd = 0x7F * * Rs1 = 0x80, Rs2 = 0x7F, Rd = 0x80 * * Rs1= 0x80, Rs2 = 0x40, Rd = 0xA0 * ~~~ * * **Operations**:\n * ~~~ * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) s>> 1; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RSUB8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("rsub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.89. RSUB8 ===== */ /* ===== Inline Function Start for 3.90. RSUB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief RSUB16 (SIMD 16-bit Signed Halving Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * RSUB16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element subtractions simultaneously. The results are halved to * avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then * written to Rd. * * **Examples**:\n * ~~~ * * Ra = 0x7FFF, Rb = 0x8000, Rt = 0x7FFF * * Ra = 0x8000, Rb = 0x7FFF, Rt = 0x8000 * * Ra = 0x8000, Rb = 0x4000, Rt = 0xA000 * ~~~ * * **Operations**:\n * ~~~ * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) s>> 1; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RSUB16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("rsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.90. RSUB16 ===== */ /* ===== Inline Function Start for 3.91. RSUB64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB * \brief RSUB64 (64-bit Signed Halving Subtraction) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * RSUB64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Perform a 64-bit signed integer subtraction. The result is halved to avoid overflow or * saturation. * * **RV32 Description**:\n * This instruction subtracts the 64-bit signed integer of an even/odd pair of * registers specified by Rb(4,1) from the 64-bit signed integer of an even/odd pair of registers * specified by Ra(4,1). The subtraction result is first arithmetically right-shifted by 1 bit and then * written to an even/odd pair of registers specified by Rt(4,1). * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register * pair includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * This instruction subtracts the 64-bit signed integer in Rs2 from the 64-bit signed * integer in Rs1. The 64-bit subtraction result is first arithmetically right-shifted by 1 bit and then * written to Rd. * * **Operations**:\n * ~~~ * RV32: * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1); * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1); * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) s>> 1; * RV64: * Rd = (Rs1 - Rs2) s>> 1; * ~~~ * * \param [in] a long long type of value stored in a * \param [in] b long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_RSUB64(long long a, long long b) { long long result; __ASM volatile("rsub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.91. RSUB64 ===== */ /* ===== Inline Function Start for 3.92. RSUBW ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION * \brief RSUBW (32-bit Signed Halving Subtraction) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * RSUBW Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Subtract 32-bit signed integers and the result is halved to avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit * signed integer in Rs1. The result is first arithmetically right-shifted by 1 bit and then sign-extended * and written to Rd. * * **Examples**:\n * ~~~ * * Rs1 = 0x7FFFFFFF, Rs2 = 0x80000000, Rd = 0x7FFFFFFF * * Rs1 = 0x80000000, Rs2 = 0x7FFFFFFF, Rd = 0x80000000 * * Rs1 = 0x80000000, Rs2 = 0x40000000, Rd = 0xA0000000 * ~~~ * * **Operations**:\n * ~~~ * RV32: * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1; * RV64: * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1; * Rd[63:0] = SE(resw[31:0]); * ~~~ * * \param [in] a int type of value stored in a * \param [in] b int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_RSUBW(int a, int b) { long result; __ASM volatile("rsubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.92. RSUBW ===== */ /* ===== Inline Function Start for 3.93. SCLIP8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC * \brief SCLIP8 (SIMD 8-bit Signed Clip Value) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SCLIP8 Rd, Rs1, imm3u[2:0] * ~~~ * * **Purpose**:\n * Limit the 8-bit signed integer elements of a register into a signed range simultaneously. * * **Description**:\n * This instruction limits the 8-bit signed integer elements stored in Rs1 into a signed * integer range between 2^imm3u-1 and -2^imm3u, and writes the limited results to Rd. For example, if * imm3u is 3, the 8-bit input values should be saturated between 7 and -8. If saturation is performed, * set OV bit to 1. * * **Operations**:\n * ~~~ * src = Rs1.B[x]; * if (src > (2^imm3u)-1) { * src = (2^imm3u)-1; * OV = 1; * } else if (src < -2^imm3u) { * src = -2^imm3u; * OV = 1; * } * Rd.B[x] = src * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SCLIP8(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("sclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.93. SCLIP8 ===== */ /* ===== Inline Function Start for 3.94. SCLIP16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC * \brief SCLIP16 (SIMD 16-bit Signed Clip Value) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SCLIP16 Rd, Rs1, imm4u[3:0] * ~~~ * * **Purpose**:\n * Limit the 16-bit signed integer elements of a register into a signed range simultaneously. * * **Description**:\n * This instruction limits the 16-bit signed integer elements stored in Rs1 into a signed * integer range between 2imm4u-1 and -2imm4u, and writes the limited results to Rd. For example, if * imm4u is 3, the 16-bit input values should be saturated between 7 and -8. If saturation is performed, * set OV bit to 1. * * **Operations**:\n * ~~~ * src = Rs1.H[x]; * if (src > (2^imm4u)-1) { * src = (2^imm4u)-1; * OV = 1; * } else if (src < -2^imm4u) { * src = -2^imm4u; * OV = 1; * } * Rd.H[x] = src * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SCLIP16(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("sclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.94. SCLIP16 ===== */ /* ===== Inline Function Start for 3.95. SCLIP32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC * \brief SCLIP32 (SIMD 32-bit Signed Clip Value) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * SCLIP32 Rd, Rs1, imm5u[4:0] * ~~~ * * **Purpose**:\n * Limit the 32-bit signed integer elements of a register into a signed range simultaneously. * * **Description**:\n * This instruction limits the 32-bit signed integer elements stored in Rs1 into a signed * integer range between 2imm5u-1 and -2imm5u, and writes the limited results to Rd. For example, if * imm5u is 3, the 32-bit input values should be saturated between 7 and -8. If saturation is performed, * set OV bit to 1. * * **Operations**:\n * ~~~ * src = Rs1.W[x]; * if (src > (2^imm5u)-1) { * src = (2^imm5u)-1; * OV = 1; * } else if (src < -2^imm5u) { * src = -2^imm5u; * OV = 1; * } * Rd.W[x] = src * for RV32: x=0, * for RV64: x=1...0 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ #define __RV_SCLIP32(a, b) \ ({ \ long result; \ long __a = (long)(a); \ __ASM volatile("sclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.95. SCLIP32 ===== */ /* ===== Inline Function Start for 3.96. SCMPLE8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP * \brief SCMPLE8 (SIMD 8-bit Signed Compare Less Than & Equal) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SCMPLE8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit signed integer elements less than & equal comparisons simultaneously. * * **Description**:\n * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is * true, the result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to * Rd * * **Operations**:\n * ~~~ * Rd.B[x] = (Rs1.B[x] {le} Rs2.B[x])? 0xff : 0x0; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SCMPLE8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("scmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.96. SCMPLE8 ===== */ /* ===== Inline Function Start for 3.97. SCMPLE16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP * \brief SCMPLE16 (SIMD 16-bit Signed Compare Less Than & Equal) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SCMPLE16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer elements less than & equal comparisons simultaneously. * * **Description**:\n * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is * true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written * to Rd. * * **Operations**:\n * ~~~ * Rd.H[x] = (Rs1.H[x] {le} Rs2.H[x])? 0xffff : 0x0; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SCMPLE16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("scmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.97. SCMPLE16 ===== */ /* ===== Inline Function Start for 3.98. SCMPLT8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP * \brief SCMPLT8 (SIMD 8-bit Signed Compare Less Than) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SCMPLT8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit signed integer elements less than comparisons simultaneously. * * **Description**:\n * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit * signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd. * * **Operations**:\n * ~~~ * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? 0xff : 0x0; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SCMPLT8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("scmplt8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.98. SCMPLT8 ===== */ /* ===== Inline Function Start for 3.99. SCMPLT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP * \brief SCMPLT16 (SIMD 16-bit Signed Compare Less Than) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SCMPLT16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer elements less than comparisons simultaneously. * * **Description**:\n * This instruction compares the 16-bit signed integer elements in Rs1 with the two 16- * bit signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the * result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written to Rd. * * **Operations**:\n * ~~~ * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? 0xffff : 0x0; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SCMPLT16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("scmplt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.99. SCMPLT16 ===== */ /* ===== Inline Function Start for 3.100. SLL8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT * \brief SLL8 (SIMD 8-bit Shift Left Logical) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SLL8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit elements logical left shift operations simultaneously. The shift amount is a * variable from a GPR. * * **Description**:\n * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd. * The shifted out bits are filled with zero and the shift amount is specified by the low-order 3-bits of * the value in the Rs2 register. * * **Operations**:\n * ~~~ * sa = Rs2[2:0]; * Rd.B[x] = Rs1.B[x] << sa; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SLL8(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("sll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.100. SLL8 ===== */ /* ===== Inline Function Start for 3.101. SLLI8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT * \brief SLLI8 (SIMD 8-bit Shift Left Logical Immediate) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SLLI8 Rd, Rs1, imm3u * ~~~ * * **Purpose**:\n * Do 8-bit elements logical left shift operations simultaneously. The shift amount is an * immediate value. * * **Description**:\n * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd. * The shifted out bits are filled with zero and the shift amount is specified by the imm3u constant. * * **Operations**:\n * ~~~ * sa = imm3u[2:0]; * Rd.B[x] = Rs1.B[x] << sa; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SLLI8(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("slli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.101. SLLI8 ===== */ /* ===== Inline Function Start for 3.102. SLL16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT * \brief SLL16 (SIMD 16-bit Shift Left Logical) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SLL16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit elements logical left shift operations simultaneously. The shift amount is a * variable from a GPR. * * **Description**:\n * The 16-bit elements in Rs1 are left-shifted logically. And the results are written to Rd. * The shifted out bits are filled with zero and the shift amount is specified by the low-order 4-bits of * the value in the Rs2 register. * * **Operations**:\n * ~~~ * sa = Rs2[3:0]; * Rd.H[x] = Rs1.H[x] << sa; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SLL16(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("sll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.102. SLL16 ===== */ /* ===== Inline Function Start for 3.103. SLLI16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT * \brief SLLI16 (SIMD 16-bit Shift Left Logical Immediate) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SLLI16 Rd, Rs1, imm4[3:0] * ~~~ * * **Purpose**:\n * Do 16-bit element logical left shift operations simultaneously. The shift amount is an * immediate value. * * **Description**:\n * The 16-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with * zero and the shift amount is specified by the imm4[3:0] constant. And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = imm4[3:0]; * Rd.H[x] = Rs1.H[x] << sa; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SLLI16(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("slli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.103. SLLI16 ===== */ /* ===== Inline Function Start for 3.104. SMAL ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB * \brief SMAL (Signed Multiply Halfs & Add 64-bit) * \details * **Type**: Partial-SIMD * * **Syntax**:\n * ~~~ * SMAL Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed bottom 16-bit content of the 32-bit elements of a register with the top * 16-bit content of the same 32-bit elements of the same register, and add the results with a 64-bit * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back * to another even/odd pair of registers (RV32) or a register (RV64). * * **RV32 Description**:\n * This instruction multiplies the bottom 16-bit content of the lower 32-bit of Rs2 with the top 16-bit * content of the lower 32-bit of Rs2 and adds the result with the 64-bit value of an even/odd pair of * registers specified by Rs1(4,1). The 64-bit addition result is written back to an even/odd pair of * registers specified by Rd(4,1). The 16-bit values of Rs2, and the 64-bit value of the Rs1(4,1) register- * pair are treated as signed integers. * Rx(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` * register of the pair contains the low 32-bit of the operand. * * **RV64 Description**:\n * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs2 with the top 16-bit * content of the same 32-bit elements of Rs2 and adds the results with the 64-bit value of Rs1. The 64- * bit addition result is written back to Rd. The 16-bit values of Rs2, and the 64-bit value of Rs1 are * treated as signed integers. * * **Operations**:\n * ~~~ * RV32: * Mres[31:0] = Rs2.H[1] * Rs2.H[0]; * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1); + * Idx2 = CONCAT(Rd(4,1),1'b0); Idx3 = CONCAT(Rd(4,1),1'b1); * R[Idx3].R[Idx2] = R[Idx1].R[Idx0] + SE64(Mres[31:0]); * RV64: * Mres[0][31:0] = Rs2.W[0].H[1] * Rs2.W[0].H[0]; * Mres[1][31:0] = Rs2.W[1].H[1] * Rs2.W[1].H[0]; * Rd = Rs1 + SE64(Mres[1][31:0]) + SE64(Mres[0][31:0]); * ~~~ * * \param [in] a long long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_SMAL(long long a, unsigned long b) { long long result; __ASM volatile("smal %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.104. SMAL ===== */ /* ===== Inline Function Start for 3.105.1. SMALBB ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB * \brief SMALBB (Signed Multiply Bottom Halfs & Add 64-bit) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * SMALBB Rd, Rs1, Rs2 * SMALBT Rd, Rs1, Rs2 * SMALTT Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit * content of the corresponding 32-bit elements of another register and add the results with a 64-bit * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back * to the register-pair (RV32) or the register (RV64). * * SMALBB: rt pair + bottom*bottom (all 32-bit elements) * * SMALBT rt pair + bottom*top (all 32-bit elements) * * SMALTT rt pair + top*top (all 32-bit elements) * * **RV32 Description**:\n * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit * content of Rs2. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit * content of Rs2. * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content * of Rs2. * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and * Rs2, and the 64-bit value of the register-pair are treated as signed integers. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` * register of the pair contains the low 32-bit of the operand. * * **RV64 Description**:\n * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2. * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with * the top 16-bit content of the 32-bit elements of Rs2. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed * integers. * * **Operations**:\n * ~~~ * RV32: * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]); * RV64: * // SMALBB * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0]; * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0]; * // SMALBT * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1]; * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1]; * // SMALTT * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1]; * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1]; * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_SMALBB(long long t, unsigned long a, unsigned long b) { __ASM volatile("smalbb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.105.1. SMALBB ===== */ /* ===== Inline Function Start for 3.105.2. SMALBT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB * \brief SMALBT (Signed Multiply Bottom Half & Top Half & Add 64-bit) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * SMALBB Rd, Rs1, Rs2 * SMALBT Rd, Rs1, Rs2 * SMALTT Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit * content of the corresponding 32-bit elements of another register and add the results with a 64-bit * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back * to the register-pair (RV32) or the register (RV64). * * SMALBB: rt pair + bottom*bottom (all 32-bit elements) * * SMALBT rt pair + bottom*top (all 32-bit elements) * * SMALTT rt pair + top*top (all 32-bit elements) * * **RV32 Description**:\n * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit * content of Rs2. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit * content of Rs2. * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content * of Rs2. * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and * Rs2, and the 64-bit value of the register-pair are treated as signed integers. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` * register of the pair contains the low 32-bit of the operand. * * **RV64 Description**:\n * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2. * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with * the top 16-bit content of the 32-bit elements of Rs2. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed * integers. * * **Operations**:\n * ~~~ * RV32: * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]); * RV64: * // SMALBB * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0]; * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0]; * // SMALBT * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1]; * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1]; * // SMALTT * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1]; * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1]; * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_SMALBT(long long t, unsigned long a, unsigned long b) { __ASM volatile("smalbt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.105.2. SMALBT ===== */ /* ===== Inline Function Start for 3.105.3. SMALTT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB * \brief SMALTT (Signed Multiply Top Halfs & Add 64-bit) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * SMALBB Rd, Rs1, Rs2 * SMALBT Rd, Rs1, Rs2 * SMALTT Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit * content of the corresponding 32-bit elements of another register and add the results with a 64-bit * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back * to the register-pair (RV32) or the register (RV64). * * SMALBB: rt pair + bottom*bottom (all 32-bit elements) * * SMALBT rt pair + bottom*top (all 32-bit elements) * * SMALTT rt pair + top*top (all 32-bit elements) * * **RV32 Description**:\n * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit * content of Rs2. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit * content of Rs2. * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content * of Rs2. * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and * Rs2, and the 64-bit value of the register-pair are treated as signed integers. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` * register of the pair contains the low 32-bit of the operand. * * **RV64 Description**:\n * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2. * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with * the top 16-bit content of the 32-bit elements of Rs2. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed * integers. * * **Operations**:\n * ~~~ * RV32: * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]); * RV64: * // SMALBB * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0]; * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0]; * // SMALBT * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1]; * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1]; * // SMALTT * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1]; * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1]; * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_SMALTT(long long t, unsigned long a, unsigned long b) { __ASM volatile("smaltt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.105.3. SMALTT ===== */ /* ===== Inline Function Start for 3.106.1. SMALDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB * \brief SMALDA (Signed Multiply Two Halfs and Two Adds 64-bit) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * SMALDA Rd, Rs1, Rs2 * SMALXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together. * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements) * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements) * * **RV32 Description**:\n * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with * the top 16-bit content of Rs2 with unlimited precision. * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1 * with the top 16-bit content of Rs2 with unlimited precision. * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64- * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64- * bit value of the register-pair are treated as signed integers. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` * register of the pair contains the low 32-bit of the operand. * * **RV64 Description**:\n * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32- * bit elements of Rs2 with unlimited precision. * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the * 32-bit elements of Rs2 with unlimited precision. * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers. * * **Operations**:\n * ~~~ * RV32: * // SMALDA * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]); * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]); * // SMALXDA * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]); * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]); * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]); * RV64: * // SMALDA * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]); * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]); * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]); * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]); * // SMALXDA * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]); * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]); * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]); * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]); * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) + * SE64(Mres1[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_SMALDA(long long t, unsigned long a, unsigned long b) { __ASM volatile("smalda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.106.1. SMALDA ===== */ /* ===== Inline Function Start for 3.106.2. SMALXDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB * \brief SMALXDA (Signed Crossed Multiply Two Halfs and Two Adds 64-bit) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * SMALDA Rd, Rs1, Rs2 * SMALXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together. * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements) * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements) * * **RV32 Description**:\n * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with * the top 16-bit content of Rs2 with unlimited precision. * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1 * with the top 16-bit content of Rs2 with unlimited precision. * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64- * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64- * bit value of the register-pair are treated as signed integers. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` * register of the pair contains the low 32-bit of the operand. * * **RV64 Description**:\n * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32- * bit elements of Rs2 with unlimited precision. * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the * 32-bit elements of Rs2 with unlimited precision. * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers. * * **Operations**:\n * ~~~ * RV32: * // SMALDA * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]); * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]); * // SMALXDA * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]); * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]); * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]); * RV64: * // SMALDA * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]); * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]); * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]); * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]); * // SMALXDA * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]); * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]); * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]); * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]); * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) + * SE64(Mres1[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_SMALXDA(long long t, unsigned long a, unsigned long b) { __ASM volatile("smalxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.106.2. SMALXDA ===== */ /* ===== Inline Function Start for 3.107.1. SMALDS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB * \brief SMALDS (Signed Multiply Two Halfs & Subtract & Add 64-bit) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * SMALDS Rd, Rs1, Rs2 * SMALDRS Rd, Rs1, Rs2 * SMALXDS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is * written back to the register-pair. * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements) * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements) * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements) * * **RV32 Description**:\n * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of * Rs1 with the top 16-bit content of Rs2. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1 * with the bottom 16-bit content of Rs2. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of * Rs1 with the bottom 16-bit content of Rs2. * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and * Rs2, and the 64-bit value of the register-pair are treated as signed integers. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` * register of the pair contains the low 32-bit of the operand. * * **RV64 Description**:\n * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content * of the 32-bit elements of Rs2. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of * the 32-bit elements of Rs2. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit * content of the 32-bit elements of Rs2. * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed * integers. * * **Operations**:\n * ~~~ * * RV32: * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]); * * RV64: * // SMALDS * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]); * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]); * // SMALDRS * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]); * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]); * // SMALXDS * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]); * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]); * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_SMALDS(long long t, unsigned long a, unsigned long b) { __ASM volatile("smalds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.107.1. SMALDS ===== */ /* ===== Inline Function Start for 3.107.2. SMALDRS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB * \brief SMALDRS (Signed Multiply Two Halfs & Reverse Subtract & Add 64- bit) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * SMALDS Rd, Rs1, Rs2 * SMALDRS Rd, Rs1, Rs2 * SMALXDS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is * written back to the register-pair. * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements) * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements) * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements) * * **RV32 Description**:\n * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of * Rs1 with the top 16-bit content of Rs2. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1 * with the bottom 16-bit content of Rs2. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of * Rs1 with the bottom 16-bit content of Rs2. * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and * Rs2, and the 64-bit value of the register-pair are treated as signed integers. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` * register of the pair contains the low 32-bit of the operand. * * **RV64 Description**:\n * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content * of the 32-bit elements of Rs2. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of * the 32-bit elements of Rs2. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit * content of the 32-bit elements of Rs2. * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed * integers. * * **Operations**:\n * ~~~ * * RV32: * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]); * * RV64: * // SMALDS * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]); * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]); * // SMALDRS * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]); * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]); * // SMALXDS * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]); * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]); * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_SMALDRS(long long t, unsigned long a, unsigned long b) { __ASM volatile("smaldrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.107.2. SMALDRS ===== */ /* ===== Inline Function Start for 3.107.3. SMALXDS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB * \brief SMALXDS (Signed Crossed Multiply Two Halfs & Subtract & Add 64- bit) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * SMALDS Rd, Rs1, Rs2 * SMALDRS Rd, Rs1, Rs2 * SMALXDS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is * written back to the register-pair. * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements) * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements) * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements) * * **RV32 Description**:\n * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of * Rs1 with the top 16-bit content of Rs2. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1 * with the bottom 16-bit content of Rs2. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of * Rs1 with the bottom 16-bit content of Rs2. * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and * Rs2, and the 64-bit value of the register-pair are treated as signed integers. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` * register of the pair contains the low 32-bit of the operand. * * **RV64 Description**:\n * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content * of the 32-bit elements of Rs2. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of * the 32-bit elements of Rs2. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit * content of the 32-bit elements of Rs2. * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed * integers. * * **Operations**:\n * ~~~ * * RV32: * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]); * * RV64: * // SMALDS * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]); * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]); * // SMALDRS * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]); * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]); * // SMALXDS * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]); * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]); * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_SMALXDS(long long t, unsigned long a, unsigned long b) { __ASM volatile("smalxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.107.3. SMALXDS ===== */ /* ===== Inline Function Start for 3.108. SMAR64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB * \brief SMAR64 (Signed Multiply and Add to 64-Bit Data) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * SMAR64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication * result to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is written * back to the pair of registers (RV32) or a register (RV64). * * **RV32 Description**:\n * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by * Rd(4,1). The addition result is written back to the even/odd pair of registers specified by Rd(4,1). * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It * adds the 64-bit multiplication results to the 64-bit signed data of Rd. The addition result is written * back to Rd. * * **Operations**:\n * ~~~ * * RV32: * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2); * * RV64: * Rd = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a long type of value stored in a * \param [in] b long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_SMAR64(long long t, long a, long b) { __ASM volatile("smar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.108. SMAR64 ===== */ /* ===== Inline Function Start for 3.109. SMAQA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD * \brief SMAQA (Signed Multiply Four Bytes with 32-bit Adds) * \details * **Type**: Partial-SIMD (Reduction) * * **Syntax**:\n * ~~~ * SMAQA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do four signed 8-bit multiplications from 32-bit chunks of two registers; and then adds * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together. * * **Description**:\n * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the signed * content of the corresponding 32-bit chunks of Rd. The final results are written back to the * corresponding 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * res[x] = Rd.W[x] + * (Rs1.W[x].B[3] s* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] s* Rs2.W[x].B[2]) + * (Rs1.W[x].B[1] s* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] s* Rs2.W[x].B[0]); * Rd.W[x] = res[x]; * for RV32: x=0, * for RV64: x=1,0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMAQA(long t, unsigned long a, unsigned long b) { __ASM volatile("smaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.109. SMAQA ===== */ /* ===== Inline Function Start for 3.110. SMAQA.SU ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD * \brief SMAQA.SU (Signed and Unsigned Multiply Four Bytes with 32-bit Adds) * \details * **Type**: Partial-SIMD (Reduction) * * **Syntax**:\n * ~~~ * SMAQA.SU Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do four `signed x unsigned` 8-bit multiplications from 32-bit chunks of two registers; and * then adds the four 16-bit results and the content of corresponding 32-bit chunks of a third register * together. * * **Description**:\n * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the * signed content of the corresponding 32-bit chunks of Rd. The final results are written back to the * corresponding 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * res[x] = Rd.W[x] + * (Rs1.W[x].B[3] su* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] su* Rs2.W[x].B[2]) + * (Rs1.W[x].B[1] su* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] su* Rs2.W[x].B[0]); * Rd.W[x] = res[x]; * for RV32: x=0, * for RV64: x=1...0 * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMAQA_SU(long t, unsigned long a, unsigned long b) { __ASM volatile("smaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.110. SMAQA.SU ===== */ /* ===== Inline Function Start for 3.111. SMAX8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC * \brief SMAX8 (SIMD 8-bit Signed Maximum) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMAX8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit signed integer elements finding maximum operations simultaneously. * * **Description**:\n * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The * selected results are written to Rd. * * **Operations**:\n * ~~~ * Rd.B[x] = (Rs1.B[x] > Rs2.B[x])? Rs1.B[x] : Rs2.B[x]; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SMAX8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("smax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.111. SMAX8 ===== */ /* ===== Inline Function Start for 3.112. SMAX16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC * \brief SMAX16 (SIMD 16-bit Signed Maximum) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMAX16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer elements finding maximum operations simultaneously. * * **Description**:\n * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The * selected results are written to Rd. * * **Operations**:\n * ~~~ * Rd.H[x] = (Rs1.H[x] > Rs2.H[x])? Rs1.H[x] : Rs2.H[x]; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SMAX16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("smax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.112. SMAX16 ===== */ /* ===== Inline Function Start for 3.113.1. SMBB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief SMBB16 (SIMD Signed Multiply Bottom Half & Bottom Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMBB16 Rd, Rs1, Rs2 * SMBT16 Rd, Rs1, Rs2 * SMTT16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16- * bit content of the 32-bit elements of another register and write the result to a third register. * * SMBB16: W[x].bottom*W[x].bottom * * SMBT16: W[x].bottom *W[x].top * * SMTT16: W[x].top * W[x].top * * **Description**:\n * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2. * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2. * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with * the top 16-bit content of the 32-bit elements of Rs2. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed * integers. * * **Operations**:\n * ~~~ * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16 * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16 * for RV32: x=0, * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMBB16(unsigned long a, unsigned long b) { long result; __ASM volatile("smbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.113.1. SMBB16 ===== */ /* ===== Inline Function Start for 3.113.2. SMBT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief SMBT16 (SIMD Signed Multiply Bottom Half & Top Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMBB16 Rd, Rs1, Rs2 * SMBT16 Rd, Rs1, Rs2 * SMTT16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16- * bit content of the 32-bit elements of another register and write the result to a third register. * * SMBB16: W[x].bottom*W[x].bottom * * SMBT16: W[x].bottom *W[x].top * * SMTT16: W[x].top * W[x].top * * **Description**:\n * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2. * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2. * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with * the top 16-bit content of the 32-bit elements of Rs2. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed * integers. * * **Operations**:\n * ~~~ * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16 * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16 * for RV32: x=0, * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMBT16(unsigned long a, unsigned long b) { long result; __ASM volatile("smbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.113.2. SMBT16 ===== */ /* ===== Inline Function Start for 3.113.3. SMTT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief SMTT16 (SIMD Signed Multiply Top Half & Top Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMBB16 Rd, Rs1, Rs2 * SMBT16 Rd, Rs1, Rs2 * SMTT16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16- * bit content of the 32-bit elements of another register and write the result to a third register. * * SMBB16: W[x].bottom*W[x].bottom * * SMBT16: W[x].bottom *W[x].top * * SMTT16: W[x].top * W[x].top * * **Description**:\n * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2. * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2. * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with * the top 16-bit content of the 32-bit elements of Rs2. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed * integers. * * **Operations**:\n * ~~~ * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16 * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16 * for RV32: x=0, * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMTT16(unsigned long a, unsigned long b) { long result; __ASM volatile("smtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.113.3. SMTT16 ===== */ /* ===== Inline Function Start for 3.114.1. SMDS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief SMDS (SIMD Signed Multiply Two Halfs and Subtract) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMDS Rd, Rs1, Rs2 * SMDRS Rd, Rs1, Rs2 * SMXDS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then * perform a subtraction operation between the two 32-bit results. * * SMDS: top*top - bottom*bottom (per 32-bit element) * * SMDRS: bottom*bottom - top*top (per 32-bit element) * * SMXDS: top*bottom - bottom*top (per 32-bit element) * * **Description**:\n * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the * 32-bit elements of Rs2. * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of * the 32-bit elements of Rs2. * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit * content of the 32-bit elements of Rs2. * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of * multiplication are treated as signed integers. * * **Operations**:\n * ~~~ * * SMDS: * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); * * SMDRS: * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); * * SMXDS: * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMDS(unsigned long a, unsigned long b) { long result; __ASM volatile("smds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.114.1. SMDS ===== */ /* ===== Inline Function Start for 3.114.2. SMDRS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief SMDRS (SIMD Signed Multiply Two Halfs and Reverse Subtract) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMDS Rd, Rs1, Rs2 * SMDRS Rd, Rs1, Rs2 * SMXDS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then * perform a subtraction operation between the two 32-bit results. * * SMDS: top*top - bottom*bottom (per 32-bit element) * * SMDRS: bottom*bottom - top*top (per 32-bit element) * * SMXDS: top*bottom - bottom*top (per 32-bit element) * * **Description**:\n * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the * 32-bit elements of Rs2. * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of * the 32-bit elements of Rs2. * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit * content of the 32-bit elements of Rs2. * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of * multiplication are treated as signed integers. * * **Operations**:\n * ~~~ * * SMDS: * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); * * SMDRS: * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); * * SMXDS: * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMDRS(unsigned long a, unsigned long b) { long result; __ASM volatile("smdrs %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.114.2. SMDRS ===== */ /* ===== Inline Function Start for 3.114.3. SMXDS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB * \brief SMXDS (SIMD Signed Crossed Multiply Two Halfs and Subtract) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMDS Rd, Rs1, Rs2 * SMDRS Rd, Rs1, Rs2 * SMXDS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then * perform a subtraction operation between the two 32-bit results. * * SMDS: top*top - bottom*bottom (per 32-bit element) * * SMDRS: bottom*bottom - top*top (per 32-bit element) * * SMXDS: top*bottom - bottom*top (per 32-bit element) * * **Description**:\n * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the * 32-bit elements of Rs2. * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of * the 32-bit elements of Rs2. * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit * content of the 32-bit elements of Rs2. * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of * multiplication are treated as signed integers. * * **Operations**:\n * ~~~ * * SMDS: * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); * * SMDRS: * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); * * SMXDS: * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMXDS(unsigned long a, unsigned long b) { long result; __ASM volatile("smxds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.114.3. SMXDS ===== */ /* ===== Inline Function Start for 3.115. SMIN8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC * \brief SMIN8 (SIMD 8-bit Signed Minimum) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMIN8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit signed integer elements finding minimum operations simultaneously. * * **Description**:\n * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected * results are written to Rd. * * **Operations**:\n * ~~~ * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? Rs1.B[x] : Rs2.B[x]; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SMIN8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("smin8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.115. SMIN8 ===== */ /* ===== Inline Function Start for 3.116. SMIN16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC * \brief SMIN16 (SIMD 16-bit Signed Minimum) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMIN16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit signed integer elements finding minimum operations simultaneously. * * **Description**:\n * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected * results are written to Rd. * * **Operations**:\n * ~~~ * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? Rs1.H[x] : Rs2.H[x]; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SMIN16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("smin16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.116. SMIN16 ===== */ /* ===== Inline Function Start for 3.117.1. SMMUL ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC * \brief SMMUL (SIMD MSW Signed Multiply Word) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMMUL Rd, Rs1, Rs2 * SMMUL.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the 32-bit signed integer elements of two registers and write the most significant * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an * additional rounding up operation on the multiplication results before taking the most significant * 32-bit part of the results. * * **Description**:\n * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results. * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction. * * **Operations**:\n * ~~~ * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; * if (`.u` form) { * Round[x][32:0] = Mres[x][63:31] + 1; * Rd.W[x] = Round[x][32:1]; * } else { * Rd.W[x] = Mres[x][63:32]; * } * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMMUL(long a, long b) { long result; __ASM volatile("smmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.117.1. SMMUL ===== */ /* ===== Inline Function Start for 3.117.2. SMMUL.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC * \brief SMMUL.u (SIMD MSW Signed Multiply Word with Rounding) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMMUL Rd, Rs1, Rs2 * SMMUL.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the 32-bit signed integer elements of two registers and write the most significant * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an * additional rounding up operation on the multiplication results before taking the most significant * 32-bit part of the results. * * **Description**:\n * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results. * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction. * * **Operations**:\n * ~~~ * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; * if (`.u` form) { * Round[x][32:0] = Mres[x][63:31] + 1; * Rd.W[x] = Round[x][32:1]; * } else { * Rd.W[x] = Mres[x][63:32]; * } * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMMUL_U(long a, long b) { long result; __ASM volatile("smmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.117.2. SMMUL.u ===== */ /* ===== Inline Function Start for 3.118.1. SMMWB ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief SMMWB (SIMD MSW Signed Multiply Word and Bottom Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMMWB Rd, Rs1, Rs2 * SMMWB.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the * corresponding 32-bit elements of another register, and write the most significant 32-bit results to * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most * significant discarded bit. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results. * * **Operations**:\n * ~~~ * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0]; * if (`.u` form) { * Round[x][32:0] = Mres[x][47:15] + 1; * Rd.W[x] = Round[x][32:1]; * } else { * Rd.W[x] = Mres[x][47:16]; * } * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMMWB(long a, unsigned long b) { long result; __ASM volatile("smmwb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.118.1. SMMWB ===== */ /* ===== Inline Function Start for 3.118.2. SMMWB.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief SMMWB.u (SIMD MSW Signed Multiply Word and Bottom Half with Rounding) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMMWB Rd, Rs1, Rs2 * SMMWB.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the * corresponding 32-bit elements of another register, and write the most significant 32-bit results to * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most * significant discarded bit. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results. * * **Operations**:\n * ~~~ * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0]; * if (`.u` form) { * Round[x][32:0] = Mres[x][47:15] + 1; * Rd.W[x] = Round[x][32:1]; * } else { * Rd.W[x] = Mres[x][47:16]; * } * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMMWB_U(long a, unsigned long b) { long result; __ASM volatile("smmwb.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.118.2. SMMWB.u ===== */ /* ===== Inline Function Start for 3.119.1. SMMWT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief SMMWT (SIMD MSW Signed Multiply Word and Top Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMMWT Rd, Rs1, Rs2 * SMMWT.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the * corresponding 32-bit elements of another register, and write the most significant 32-bit results to * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most * significant discarded bit. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results. * * **Operations**:\n * ~~~ * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1]; * if (`.u` form) { * Round[x][32:0] = Mres[x][47:15] + 1; * Rd.W[x] = Round[x][32:1]; * } else { * Rd.W[x] = Mres[x][47:16]; * } * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMMWT(long a, unsigned long b) { long result; __ASM volatile("smmwt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.119.1. SMMWT ===== */ /* ===== Inline Function Start for 3.119.2. SMMWT.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC * \brief SMMWT.u (SIMD MSW Signed Multiply Word and Top Half with Rounding) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMMWT Rd, Rs1, Rs2 * SMMWT.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the * corresponding 32-bit elements of another register, and write the most significant 32-bit results to * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most * significant discarded bit. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results. * * **Operations**:\n * ~~~ * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1]; * if (`.u` form) { * Round[x][32:0] = Mres[x][47:15] + 1; * Rd.W[x] = Round[x][32:1]; * } else { * Rd.W[x] = Mres[x][47:16]; * } * for RV32: x=0 * for RV64: x=1...0 * ~~~ * * \param [in] a long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMMWT_U(long a, unsigned long b) { long result; __ASM volatile("smmwt.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.119.2. SMMWT.u ===== */ /* ===== Inline Function Start for 3.120.1. SMSLDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB * \brief SMSLDA (Signed Multiply Two Halfs & Add & Subtract 64-bit) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * SMSLDA Rd, Rs1, Rs2 * SMSLXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a * register (RV64). The subtraction result is written back to the register-pair. * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements) * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements) * * **RV32 Description**:\n * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2. * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2. * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated * as signed integers. * * **Operations**:\n * ~~~ * * RV32: * // SMSLDA * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]); * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]); * // SMSLXDA * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]); * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]); * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]); * * RV64: * // SMSLDA * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]); * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]); * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]); * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]); * // SMSLXDA * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]); * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]); * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]); * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]); * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) - * SE64(Mres1[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_SMSLDA(long long t, unsigned long a, unsigned long b) { __ASM volatile("smslda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.120.1. SMSLDA ===== */ /* ===== Inline Function Start for 3.120.2. SMSLXDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB * \brief SMSLXDA (Signed Crossed Multiply Two Halfs & Add & Subtract 64- bit) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * SMSLDA Rd, Rs1, Rs2 * SMSLXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a * register (RV64). The subtraction result is written back to the register-pair. * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements) * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements) * * **RV32 Description**:\n * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2. * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2. * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated * as signed integers. * * **Operations**:\n * ~~~ * * RV32: * // SMSLDA * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]); * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]); * // SMSLXDA * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]); * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]); * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]); * * RV64: * // SMSLDA * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]); * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]); * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]); * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]); * // SMSLXDA * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]); * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]); * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]); * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]); * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) - * SE64(Mres1[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_SMSLXDA(long long t, unsigned long a, unsigned long b) { __ASM volatile("smslxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.120.2. SMSLXDA ===== */ /* ===== Inline Function Start for 3.121. SMSR64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB * \brief SMSR64 (Signed Multiply and Subtract from 64- Bit Data) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * SMSR64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is * written back to the pair of registers (RV32) or a register (RV64). * * **RV32 Description**:\n * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers * specified by Rd(4,1). The subtraction result is written back to the even/odd pair of registers * specified by Rd(4,1). * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It * subtracts the 64-bit multiplication results from the 64-bit signed data of Rd. The subtraction result is * written back to Rd. * * **Operations**:\n * ~~~ * * RV32: * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * R[t_H].R[t_L] = R[t_H].R[t_L] - (Rs1 * Rs2); * * RV64: * Rd = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a long type of value stored in a * \param [in] b long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_SMSR64(long long t, long a, long b) { __ASM volatile("smsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.121. SMSR64 ===== */ /* ===== Inline Function Start for 3.122.1. SMUL8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY * \brief SMUL8 (SIMD Signed 8-bit Multiply) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMUL8 Rd, Rs1, Rs2 * SMULX8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do signed 8-bit multiplications and generate four 16-bit results simultaneously. * * **RV32 Description**:\n * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the * corresponding 8-bit data elements of Rs2. * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data * elements of Rs1 with the fourth and third 8-bit data elements of Rs2. * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1). * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom * part of Rs1. * * **RV64 Description**:\n * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the * corresponding 8-bit data elements of Rs2. * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data * elements of Rs1 with the fourth and third 8-bit data elements of Rs2. * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from * the bottom part of Rs1. * * **Operations**:\n * ~~~ * * RV32: * if (is `SMUL8`) { * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom * } else if (is `SMULX8`) { * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom * } * rest[x/2] = op1t[x/2] s* op2t[x/2]; * resb[x/2] = op1b[x/2] s* op2b[x/2]; * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1]; * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0]; * x = 0 and 2 * * RV64: * if (is `SMUL8`) { * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom * } else if (is `SMULX8`) { * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom * } * rest[x/2] = op1t[x/2] s* op2t[x/2]; * resb[x/2] = op1b[x/2] s* op2b[x/2]; * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1]; * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; * x = 0 and 2 * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_SMUL8(unsigned int a, unsigned int b) { unsigned long long result; __ASM volatile("smul8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.122.1. SMUL8 ===== */ /* ===== Inline Function Start for 3.122.2. SMULX8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY * \brief SMULX8 (SIMD Signed Crossed 8-bit Multiply) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMUL8 Rd, Rs1, Rs2 * SMULX8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do signed 8-bit multiplications and generate four 16-bit results simultaneously. * * **RV32 Description**:\n * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the * corresponding 8-bit data elements of Rs2. * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data * elements of Rs1 with the fourth and third 8-bit data elements of Rs2. * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1). * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom * part of Rs1. * * **RV64 Description**:\n * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the * corresponding 8-bit data elements of Rs2. * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data * elements of Rs1 with the fourth and third 8-bit data elements of Rs2. * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from * the bottom part of Rs1. * * **Operations**:\n * ~~~ * * RV32: * if (is `SMUL8`) { * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom * } else if (is `SMULX8`) { * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom * } * rest[x/2] = op1t[x/2] s* op2t[x/2]; * resb[x/2] = op1b[x/2] s* op2b[x/2]; * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1]; * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0]; * x = 0 and 2 * * RV64: * if (is `SMUL8`) { * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom * } else if (is `SMULX8`) { * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom * } * rest[x/2] = op1t[x/2] s* op2t[x/2]; * resb[x/2] = op1b[x/2] s* op2b[x/2]; * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1]; * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; * x = 0 and 2 * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_SMULX8(unsigned int a, unsigned int b) { unsigned long long result; __ASM volatile("smulx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.122.2. SMULX8 ===== */ /* ===== Inline Function Start for 3.123.1. SMUL16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY * \brief SMUL16 (SIMD Signed 16-bit Multiply) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMUL16 Rd, Rs1, Rs2 * SMULX16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do signed 16-bit multiplications and generate two 32-bit results simultaneously. * * **RV32 Description**:\n * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 * with the bottom 16-bit Q15 content of Rs2. * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16- * bit Q15 content of Rs2. * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1), * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes * register 2d and 2d+1. * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1. * * **RV64 Description**:\n * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15 * content of the lower 32-bit word in Rs2. * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1 * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the * lower 32-bit word in Rs2. * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of * the lower 32-bit word in Rs1 is written to Rd.W[0] * * **Operations**:\n * ~~~ * * RV32: * if (is `SMUL16`) { * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom * } else if (is `SMULX16`) { * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom * } * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * res = aop s* bop; * } * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * R[t_H] = rest; * R[t_L] = resb; * * RV64: * if (is `SMUL16`) { * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom * } else if (is `SMULX16`) { * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom * } * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * res = aop s* bop; * } * Rd.W[1] = rest; * Rd.W[0] = resb; * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_SMUL16(unsigned int a, unsigned int b) { unsigned long long result; __ASM volatile("smul16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.123.1. SMUL16 ===== */ /* ===== Inline Function Start for 3.123.2. SMULX16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY * \brief SMULX16 (SIMD Signed Crossed 16-bit Multiply) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SMUL16 Rd, Rs1, Rs2 * SMULX16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do signed 16-bit multiplications and generate two 32-bit results simultaneously. * * **RV32 Description**:\n * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 * with the bottom 16-bit Q15 content of Rs2. * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16- * bit Q15 content of Rs2. * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1), * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes * register 2d and 2d+1. * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1. * * **RV64 Description**:\n * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15 * content of the lower 32-bit word in Rs2. * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1 * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the * lower 32-bit word in Rs2. * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of * the lower 32-bit word in Rs1 is written to Rd.W[0] * * **Operations**:\n * ~~~ * * RV32: * if (is `SMUL16`) { * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom * } else if (is `SMULX16`) { * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom * } * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * res = aop s* bop; * } * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * R[t_H] = rest; * R[t_L] = resb; * * RV64: * if (is `SMUL16`) { * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom * } else if (is `SMULX16`) { * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom * } * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * res = aop s* bop; * } * Rd.W[1] = rest; * Rd.W[0] = resb; * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_SMULX16(unsigned int a, unsigned int b) { unsigned long long result; __ASM volatile("smulx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.123.2. SMULX16 ===== */ /* ===== Inline Function Start for 3.124. SRA.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC * \brief SRA.u (Rounding Shift Right Arithmetic) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * SRA.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Perform an arithmetic right shift operation with rounding. The shift amount is a variable * from a GPR. * * **Description**:\n * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are * filled with the sign-bit and the shift amount is specified by the low-order 5-bits (RV32) or 6-bits * (RV64) of the Rs2 register. For the rounding operation, a value of 1 is added to the most significant * discarded bit of the data to calculate the final result. And the result is written to Rd. * * **Operations**:\n * ~~~ * * RV32: * sa = Rs2[4:0]; * if (sa > 0) { * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1; * Rd = res[31:0]; * } else { * Rd = Rs1; * } * * RV64: * sa = Rs2[5:0]; * if (sa > 0) { * res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1; * Rd = res[63:0]; * } else { * Rd = Rs1; * } * ~~~ * * \param [in] a long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SRA_U(long a, unsigned int b) { long result; __ASM volatile("sra.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.124. SRA.u ===== */ /* ===== Inline Function Start for 3.125. SRAI.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC * \brief SRAI.u (Rounding Shift Right Arithmetic Immediate) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * SRAI.u Rd, Rs1, imm6u[4:0] (RV32) * SRAI.u Rd, Rs1, imm6u[5:0] (RV64) * ~~~ * * **Purpose**:\n * Perform an arithmetic right shift operation with rounding. The shift amount is an * immediate value. * * **Description**:\n * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are * filled with the sign-bit and the shift amount is specified by the imm6u[4:0] (RV32) or imm6u[5:0] * (RV64) constant . For the rounding operation, a value of 1 is added to the most significant discarded * bit of the data to calculate the final result. And the result is written to Rd. * * **Operations**:\n * ~~~ * * RV32: * sa = imm6u[4:0]; * if (sa > 0) { * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1; * Rd = res[31:0]; * } else { * Rd = Rs1; * } * * RV64: * sa = imm6u[5:0]; * if (sa > 0) { * res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1; * Rd = res[63:0]; * } else { * Rd = Rs1; * } * ~~~ * * \param [in] a long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ #define __RV_SRAI_U(a, b) \ ({ \ long result; \ long __a = (long)(a); \ __ASM volatile("srai.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.125. SRAI.u ===== */ /* ===== Inline Function Start for 3.126.1. SRA8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT * \brief SRA8 (SIMD 8-bit Shift Right Arithmetic) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRA8 Rd, Rs1, Rs2 * SRA8.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted * results. * * **Description**:\n * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is * added to the most significant discarded bit of each 8-bit data element to calculate the final results. * And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = Rs2[2:0]; * if (sa > 0) { * if (`.u` form) { // SRA8.u * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1; * Rd.B[x] = res[7:0]; * } else { // SRA8 * Rd.B[x] = SE8(Rd.B[x][7:sa]) * } * } else { * Rd = Rs1; * } * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SRA8(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("sra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.126.1. SRA8 ===== */ /* ===== Inline Function Start for 3.126.2. SRA8.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT * \brief SRA8.u (SIMD 8-bit Rounding Shift Right Arithmetic) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRA8 Rd, Rs1, Rs2 * SRA8.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted * results. * * **Description**:\n * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is * added to the most significant discarded bit of each 8-bit data element to calculate the final results. * And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = Rs2[2:0]; * if (sa > 0) { * if (`.u` form) { // SRA8.u * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1; * Rd.B[x] = res[7:0]; * } else { // SRA8 * Rd.B[x] = SE8(Rd.B[x][7:sa]) * } * } else { * Rd = Rs1; * } * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SRA8_U(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("sra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.126.2. SRA8.u ===== */ /* ===== Inline Function Start for 3.127.1. SRAI8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT * \brief SRAI8 (SIMD 8-bit Shift Right Arithmetic Immediate) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRAI8 Rd, Rs1, imm3u * SRAI8.u Rd, Rs1, imm3u * ~~~ * * **Purpose**:\n * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an * immediate value. The `.u` form performs additional rounding up operations on the shifted results. * * **Description**:\n * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant * discarded bit of each 8-bit data element to calculate the final results. And the results are written to * Rd. * * **Operations**:\n * ~~~ * sa = imm3u[2:0]; * if (sa > 0) { * if (`.u` form) { // SRA8.u * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1; * Rd.B[x] = res[7:0]; * } else { // SRA8 * Rd.B[x] = SE8(Rd.B[x][7:sa]) * } * } else { * Rd = Rs1; * } * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SRAI8(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("srai8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.127.1. SRAI8 ===== */ /* ===== Inline Function Start for 3.127.2. SRAI8.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT * \brief SRAI8.u (SIMD 8-bit Rounding Shift Right Arithmetic Immediate) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRAI8 Rd, Rs1, imm3u * SRAI8.u Rd, Rs1, imm3u * ~~~ * * **Purpose**:\n * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an * immediate value. The `.u` form performs additional rounding up operations on the shifted results. * * **Description**:\n * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant * discarded bit of each 8-bit data element to calculate the final results. And the results are written to * Rd. * * **Operations**:\n * ~~~ * sa = imm3u[2:0]; * if (sa > 0) { * if (`.u` form) { // SRA8.u * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1; * Rd.B[x] = res[7:0]; * } else { // SRA8 * Rd.B[x] = SE8(Rd.B[x][7:sa]) * } * } else { * Rd = Rs1; * } * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SRAI8_U(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("srai8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.127.2. SRAI8.u ===== */ /* ===== Inline Function Start for 3.128.1. SRA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT * \brief SRA16 (SIMD 16-bit Shift Right Arithmetic) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRA16 Rd, Rs1, Rs2 * SRA16.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted * results. * * **Description**:\n * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is * added to the most significant discarded bit of each 16-bit data element to calculate the final results. * And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = Rs2[3:0]; * if (sa != 0) { * if (`.u` form) { // SRA16.u * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1; * Rd.H[x] = res[15:0]; * } else { // SRA16 * Rd.H[x] = SE16(Rs1.H[x][15:sa]) * } * } else { * Rd = Rs1; * } * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SRA16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("sra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.128.1. SRA16 ===== */ /* ===== Inline Function Start for 3.128.2. SRA16.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT * \brief SRA16.u (SIMD 16-bit Rounding Shift Right Arithmetic) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRA16 Rd, Rs1, Rs2 * SRA16.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted * results. * * **Description**:\n * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is * added to the most significant discarded bit of each 16-bit data element to calculate the final results. * And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = Rs2[3:0]; * if (sa != 0) { * if (`.u` form) { // SRA16.u * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1; * Rd.H[x] = res[15:0]; * } else { // SRA16 * Rd.H[x] = SE16(Rs1.H[x][15:sa]) * } * } else { * Rd = Rs1; * } * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SRA16_U(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("sra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.128.2. SRA16.u ===== */ /* ===== Inline Function Start for 3.129.1. SRAI16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT * \brief SRAI16 (SIMD 16-bit Shift Right Arithmetic Immediate) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRAI16 Rd, Rs1, imm4u * SRAI16.u Rd, Rs1, imm4u * ~~~ * * **Purpose**:\n * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is * an immediate value. The `.u` form performs additional rounding up operations on the shifted * results. * * **Description**:\n * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most * significant discarded bit of each 16-bit data to calculate the final results. And the results are written * to Rd. * * **Operations**:\n * ~~~ * sa = imm4u[3:0]; * if (sa > 0) { * if (`.u` form) { // SRAI16.u * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1; * Rd.H[x] = res[15:0]; * } else { // SRAI16 * Rd.H[x] = SE16(Rs1.H[x][15:sa]); * } * } else { * Rd = Rs1; * } * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ #define __RV_SRAI16(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("srai16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.129.1. SRAI16 ===== */ /* ===== Inline Function Start for 3.129.2. SRAI16.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT * \brief SRAI16.u (SIMD 16-bit Rounding Shift Right Arithmetic Immediate) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRAI16 Rd, Rs1, imm4u * SRAI16.u Rd, Rs1, imm4u * ~~~ * * **Purpose**:\n * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is * an immediate value. The `.u` form performs additional rounding up operations on the shifted * results. * * **Description**:\n * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most * significant discarded bit of each 16-bit data to calculate the final results. And the results are written * to Rd. * * **Operations**:\n * ~~~ * sa = imm4u[3:0]; * if (sa > 0) { * if (`.u` form) { // SRAI16.u * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1; * Rd.H[x] = res[15:0]; * } else { // SRAI16 * Rd.H[x] = SE16(Rs1.H[x][15:sa]); * } * } else { * Rd = Rs1; * } * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ #define __RV_SRAI16_U(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("srai16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.129.2. SRAI16.u ===== */ /* ===== Inline Function Start for 3.130.1. SRL8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT * \brief SRL8 (SIMD 8-bit Shift Right Logical) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRL8 Rt, Ra, Rb * SRL8.u Rt, Ra, Rb * ~~~ * * **Purpose**:\n * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted * results. * * **Description**:\n * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register. * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded * bit of each 8-bit data element to calculate the final results. And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = Rs2[2:0]; * if (sa > 0) { * if (`.u` form) { // SRL8.u * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1; * Rd.B[x] = res[8:1]; * } else { // SRL8 * Rd.B[x] = ZE8(Rs1.B[x][7:sa]); * } * } else { * Rd = Rs1; * } * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SRL8(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("srl8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.130.1. SRL8 ===== */ /* ===== Inline Function Start for 3.130.2. SRL8.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT * \brief SRL8.u (SIMD 8-bit Rounding Shift Right Logical) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRL8 Rt, Ra, Rb * SRL8.u Rt, Ra, Rb * ~~~ * * **Purpose**:\n * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted * results. * * **Description**:\n * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register. * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded * bit of each 8-bit data element to calculate the final results. And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = Rs2[2:0]; * if (sa > 0) { * if (`.u` form) { // SRL8.u * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1; * Rd.B[x] = res[8:1]; * } else { // SRL8 * Rd.B[x] = ZE8(Rs1.B[x][7:sa]); * } * } else { * Rd = Rs1; * } * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SRL8_U(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("srl8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.130.2. SRL8.u ===== */ /* ===== Inline Function Start for 3.131.1. SRLI8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT * \brief SRLI8 (SIMD 8-bit Shift Right Logical Immediate) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRLI8 Rt, Ra, imm3u * SRLI8.u Rt, Ra, imm3u * ~~~ * * **Purpose**:\n * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an * immediate value. The `.u` form performs additional rounding up operations on the shifted results. * * **Description**:\n * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to * calculate the final results. And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = imm3u[2:0]; * if (sa > 0) { * if (`.u` form) { // SRLI8.u * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1; * Rd.B[x] = res[8:1]; * } else { // SRLI8 * Rd.B[x] = ZE8(Rs1.B[x][7:sa]); * } * } else { * Rd = Rs1; * } * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SRLI8(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("srli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.131.1. SRLI8 ===== */ /* ===== Inline Function Start for 3.131.2. SRLI8.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT * \brief SRLI8.u (SIMD 8-bit Rounding Shift Right Logical Immediate) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRLI8 Rt, Ra, imm3u * SRLI8.u Rt, Ra, imm3u * ~~~ * * **Purpose**:\n * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an * immediate value. The `.u` form performs additional rounding up operations on the shifted results. * * **Description**:\n * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to * calculate the final results. And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = imm3u[2:0]; * if (sa > 0) { * if (`.u` form) { // SRLI8.u * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1; * Rd.B[x] = res[8:1]; * } else { // SRLI8 * Rd.B[x] = ZE8(Rs1.B[x][7:sa]); * } * } else { * Rd = Rs1; * } * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SRLI8_U(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("srli8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.131.2. SRLI8.u ===== */ /* ===== Inline Function Start for 3.132.1. SRL16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT * \brief SRL16 (SIMD 16-bit Shift Right Logical) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRL16 Rt, Ra, Rb * SRL16.u Rt, Ra, Rb * ~~~ * * **Purpose**:\n * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results. * * **Description**:\n * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant * discarded bit of each 16-bit data element to calculate the final results. And the results are written to * Rd. * * **Operations**:\n * ~~~ * sa = Rs2[3:0]; * if (sa > 0) { * if (`.u` form) { // SRL16.u * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1; * Rd.H[x] = res[16:1]; * } else { // SRL16 * Rd.H[x] = ZE16(Rs1.H[x][15:sa]); * } * } else { * Rd = Rs1; * } * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SRL16(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("srl16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.132.1. SRL16 ===== */ /* ===== Inline Function Start for 3.132.2. SRL16.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT * \brief SRL16.u (SIMD 16-bit Rounding Shift Right Logical) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRL16 Rt, Ra, Rb * SRL16.u Rt, Ra, Rb * ~~~ * * **Purpose**:\n * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results. * * **Description**:\n * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant * discarded bit of each 16-bit data element to calculate the final results. And the results are written to * Rd. * * **Operations**:\n * ~~~ * sa = Rs2[3:0]; * if (sa > 0) { * if (`.u` form) { // SRL16.u * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1; * Rd.H[x] = res[16:1]; * } else { // SRL16 * Rd.H[x] = ZE16(Rs1.H[x][15:sa]); * } * } else { * Rd = Rs1; * } * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SRL16_U(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("srl16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.132.2. SRL16.u ===== */ /* ===== Inline Function Start for 3.133.1. SRLI16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT * \brief SRLI16 (SIMD 16-bit Shift Right Logical Immediate) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRLI16 Rt, Ra, imm4u * SRLI16.u Rt, Ra, imm4u * ~~~ * * **Purpose**:\n * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an * immediate value. The `.u` form performs additional rounding up operations on the shifted results. * * **Description**:\n * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit * data element to calculate the final results. And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = imm4u; * if (sa > 0) { * if (`.u` form) { // SRLI16.u * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1; * Rd.H[x] = res[16:1]; * } else { // SRLI16 * Rd.H[x] = ZE16(Rs1.H[x][15:sa]); * } * } else { * Rd = Rs1; * } * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SRLI16(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("srli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.133.1. SRLI16 ===== */ /* ===== Inline Function Start for 3.133.2. SRLI16.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT * \brief SRLI16.u (SIMD 16-bit Rounding Shift Right Logical Immediate) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SRLI16 Rt, Ra, imm4u * SRLI16.u Rt, Ra, imm4u * ~~~ * * **Purpose**:\n * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an * immediate value. The `.u` form performs additional rounding up operations on the shifted results. * * **Description**:\n * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit * data element to calculate the final results. And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = imm4u; * if (sa > 0) { * if (`.u` form) { // SRLI16.u * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1; * Rd.H[x] = res[16:1]; * } else { // SRLI16 * Rd.H[x] = ZE16(Rs1.H[x][15:sa]); * } * } else { * Rd = Rs1; * } * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SRLI16_U(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("srli16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.133.2. SRLI16.u ===== */ /* ===== Inline Function Start for 3.134. STAS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief STAS16 (SIMD 16-bit Straight Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * STAS16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. * * **Description**:\n * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with * the 16-bit integer element in [31:16] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [15:0] of 32-bit chunks in * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32- * bit chunks in Rd. * * **Note**:\n * This instruction can be used for either signed or unsigned operations. * * **Operations**:\n * ~~~ * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][31:16]; * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][15:0]; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_STAS16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("stas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.134. STAS16 ===== */ /* ===== Inline Function Start for 3.135. STSA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief STSA16 (SIMD 16-bit Straight Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * STSA16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. * * **Description**:\n * This instruction subtracts the 16-bit integer element in [31:16] of 32-bit chunks in Rs2 * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [15:0] of 32-bit chunks in * Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to [15:0] of * 32-bit chunks in Rd. * * **Note**:\n * This instruction can be used for either signed or unsigned operations. * * **Operations**:\n * ~~~ * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][31:16]; * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][15:0]; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_STSA16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("stsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.135. STSA16 ===== */ /* ===== Inline Function Start for 3.136. SUB8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB * \brief SUB8 (SIMD 8-bit Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SUB8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit integer element subtractions simultaneously. * * **Description**:\n * This instruction subtracts the 8-bit integer elements in Rs2 from the 8-bit integer * elements in Rs1, and then writes the result to Rd. * * **Note**:\n * This instruction can be used for either signed or unsigned subtraction. * * **Operations**:\n * ~~~ * Rd.B[x] = Rs1.B[x] - Rs2.B[x]; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SUB8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("sub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.136. SUB8 ===== */ /* ===== Inline Function Start for 3.137. SUB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief SUB16 (SIMD 16-bit Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * SUB16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit integer element subtractions simultaneously. * * **Description**:\n * This instruction subtracts the 16-bit integer elements in Rs2 from the 16-bit integer * elements in Rs1, and then writes the result to Rd. * * **Note**:\n * This instruction can be used for either signed or unsigned subtraction. * * **Operations**:\n * ~~~ * Rd.H[x] = Rs1.H[x] - Rs2.H[x]; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SUB16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("sub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.137. SUB16 ===== */ /* ===== Inline Function Start for 3.138. SUB64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB * \brief SUB64 (64-bit Subtraction) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * SUB64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Perform a 64-bit signed or unsigned integer subtraction. * * **RV32 Description**:\n * This instruction subtracts the 64-bit integer of an even/odd pair of registers * specified by Rs2(4,1) from the 64-bit integer of an even/odd pair of registers specified by Rs1(4,1), * and then writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1). * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` * register of the pair contains the low 32-bit of the operand. * * **RV64 Description**:\n * This instruction subtracts the 64-bit integer of Rs2 from the 64-bit integer of Rs1, * and then writes the 64-bit result to Rd. * * **Note**:\n * This instruction can be used for either signed or unsigned subtraction. * * **Operations**:\n * ~~~ * * RV32: * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1); * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1); * R[t_H].R[t_L] = R[a_H].R[a_L] - R[b_H].R[b_L]; * * RV64: * Rd = Rs1 - Rs2; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_SUB64(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("sub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.138. SUB64 ===== */ /* ===== Inline Function Start for 3.139.1. SUNPKD810 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK * \brief SUNPKD810 (Signed Unpacking Bytes 1 & 0) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * SUNPKD8xy Rd, Rs1 * xy = {10, 20, 30, 31, 32} * ~~~ * * **Purpose**:\n * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords * of 32-bit chunks in a register. * * **Description**:\n * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit * chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y]) * // SUNPKD810, x=1,y=0 * // SUNPKD820, x=2,y=0 * // SUNPKD830, x=3,y=0 * // SUNPKD831, x=3,y=1 * // SUNPKD832, x=3,y=2 * for RV32: m=0, * for RV64: m=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SUNPKD810(unsigned long a) { unsigned long result; __ASM volatile("sunpkd810 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.139.1. SUNPKD810 ===== */ /* ===== Inline Function Start for 3.139.2. SUNPKD820 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK * \brief SUNPKD820 (Signed Unpacking Bytes 2 & 0) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * SUNPKD8xy Rd, Rs1 * xy = {10, 20, 30, 31, 32} * ~~~ * * **Purpose**:\n * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords * of 32-bit chunks in a register. * * **Description**:\n * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit * chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y]) * // SUNPKD810, x=1,y=0 * // SUNPKD820, x=2,y=0 * // SUNPKD830, x=3,y=0 * // SUNPKD831, x=3,y=1 * // SUNPKD832, x=3,y=2 * for RV32: m=0, * for RV64: m=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SUNPKD820(unsigned long a) { unsigned long result; __ASM volatile("sunpkd820 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.139.2. SUNPKD820 ===== */ /* ===== Inline Function Start for 3.139.3. SUNPKD830 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK * \brief SUNPKD830 (Signed Unpacking Bytes 3 & 0) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * SUNPKD8xy Rd, Rs1 * xy = {10, 20, 30, 31, 32} * ~~~ * * **Purpose**:\n * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords * of 32-bit chunks in a register. * * **Description**:\n * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit * chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y]) * // SUNPKD810, x=1,y=0 * // SUNPKD820, x=2,y=0 * // SUNPKD830, x=3,y=0 * // SUNPKD831, x=3,y=1 * // SUNPKD832, x=3,y=2 * for RV32: m=0, * for RV64: m=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SUNPKD830(unsigned long a) { unsigned long result; __ASM volatile("sunpkd830 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.139.3. SUNPKD830 ===== */ /* ===== Inline Function Start for 3.139.4. SUNPKD831 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK * \brief SUNPKD831 (Signed Unpacking Bytes 3 & 1) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * SUNPKD8xy Rd, Rs1 * xy = {10, 20, 30, 31, 32} * ~~~ * * **Purpose**:\n * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords * of 32-bit chunks in a register. * * **Description**:\n * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit * chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y]) * // SUNPKD810, x=1,y=0 * // SUNPKD820, x=2,y=0 * // SUNPKD830, x=3,y=0 * // SUNPKD831, x=3,y=1 * // SUNPKD832, x=3,y=2 * for RV32: m=0, * for RV64: m=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SUNPKD831(unsigned long a) { unsigned long result; __ASM volatile("sunpkd831 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.139.4. SUNPKD831 ===== */ /* ===== Inline Function Start for 3.139.5. SUNPKD832 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK * \brief SUNPKD832 (Signed Unpacking Bytes 3 & 2) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * SUNPKD8xy Rd, Rs1 * xy = {10, 20, 30, 31, 32} * ~~~ * * **Purpose**:\n * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords * of 32-bit chunks in a register. * * **Description**:\n * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit * chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y]) * // SUNPKD810, x=1,y=0 * // SUNPKD820, x=2,y=0 * // SUNPKD830, x=3,y=0 * // SUNPKD831, x=3,y=1 * // SUNPKD832, x=3,y=2 * for RV32: m=0, * for RV64: m=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SUNPKD832(unsigned long a) { unsigned long result; __ASM volatile("sunpkd832 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.139.5. SUNPKD832 ===== */ /* ===== Inline Function Start for 3.140. SWAP8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC * \brief SWAP8 (Swap Byte within Halfword) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * SWAP8 Rd, Rs1 * ~~~ * * **Purpose**:\n * Swap the bytes within each halfword of a register. * * **Description**:\n * This instruction swaps the bytes within each halfword of Rs1 and writes the result to * Rd. * * **Operations**:\n * ~~~ * Rd.H[x] = CONCAT(Rs1.H[x][7:0],Rs1.H[x][15:8]); * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SWAP8(unsigned long a) { unsigned long result; __ASM volatile("swap8 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.140. SWAP8 ===== */ /* ===== Inline Function Start for 3.141. SWAP16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC * \brief SWAP16 (Swap Halfword within Word) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * SWAP16 Rd, Rs1 * ~~~ * * **Purpose**:\n * Swap the 16-bit halfwords within each word of a register. * * **Description**:\n * This instruction swaps the 16-bit halfwords within each word of Rs1 and writes the * result to Rd. * * **Operations**:\n * ~~~ * Rd.W[x] = CONCAT(Rs1.W[x][15:0],Rs1.H[x][31:16]); * for RV32: x=0, * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SWAP16(unsigned long a) { unsigned long result; __ASM volatile("swap16 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.141. SWAP16 ===== */ /* ===== Inline Function Start for 3.142. UCLIP8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC * \brief UCLIP8 (SIMD 8-bit Unsigned Clip Value) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UCLIP8 Rt, Ra, imm3u * ~~~ * * **Purpose**:\n * Limit the 8-bit signed elements of a register into an unsigned range simultaneously. * * **Description**:\n * This instruction limits the 8-bit signed elements stored in Rs1 into an unsigned integer * range between 2^imm3u-1 and 0, and writes the limited results to Rd. For example, if imm3u is 3, the 8- * bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit to 1. * * **Operations**:\n * ~~~ * src = Rs1.H[x]; * if (src > (2^imm3u)-1) { * src = (2^imm3u)-1; * OV = 1; * } else if (src < 0) { * src = 0; * OV = 1; * } * Rd.H[x] = src; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_UCLIP8(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("uclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.142. UCLIP8 ===== */ /* ===== Inline Function Start for 3.143. UCLIP16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC * \brief UCLIP16 (SIMD 16-bit Unsigned Clip Value) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UCLIP16 Rt, Ra, imm4u * ~~~ * * **Purpose**:\n * Limit the 16-bit signed elements of a register into an unsigned range simultaneously. * * **Description**:\n * This instruction limits the 16-bit signed elements stored in Rs1 into an unsigned * integer range between 2imm4u-1 and 0, and writes the limited results to Rd. For example, if imm4u is * 3, the 16-bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit * to 1. * * **Operations**:\n * ~~~ * src = Rs1.H[x]; * if (src > (2^imm4u)-1) { * src = (2^imm4u)-1; * OV = 1; * } else if (src < 0) { * src = 0; * OV = 1; * } * Rd.H[x] = src; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_UCLIP16(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("uclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.143. UCLIP16 ===== */ /* ===== Inline Function Start for 3.144. UCLIP32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC * \brief UCLIP32 (SIMD 32-bit Unsigned Clip Value) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UCLIP32 Rd, Rs1, imm5u[4:0] * ~~~ * * **Purpose**:\n * Limit the 32-bit signed integer elements of a register into an unsigned range * simultaneously. * * **Description**:\n * This instruction limits the 32-bit signed integer elements stored in Rs1 into an * unsigned integer range between 2imm5u-1 and 0, and writes the limited results to Rd. For example, if * imm5u is 3, the 32-bit input values should be saturated between 7 and 0. If saturation is performed, * set OV bit to 1. * * **Operations**:\n * ~~~ * src = Rs1.W[x]; * if (src > (2^imm5u)-1) { * src = (2^imm5u)-1; * OV = 1; * } else if (src < 0) { * src = 0; * OV = 1; * } * Rd.W[x] = src * for RV32: x=0, * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_UCLIP32(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("uclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.144. UCLIP32 ===== */ /* ===== Inline Function Start for 3.145. UCMPLE8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP * \brief UCMPLE8 (SIMD 8-bit Unsigned Compare Less Than & Equal) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UCMPLE8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit unsigned integer elements less than & equal comparisons simultaneously. * * **Description**:\n * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it * is true, the result is 0xFF; otherwise, the result is 0x0. The four comparison results are written to * Rd. * * **Operations**:\n * ~~~ * Rd.B[x] = (Rs1.B[x] <=u Rs2.B[x])? 0xff : 0x0; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UCMPLE8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ucmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.145. UCMPLE8 ===== */ /* ===== Inline Function Start for 3.146. UCMPLE16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP * \brief UCMPLE16 (SIMD 16-bit Unsigned Compare Less Than & Equal) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UCMPLE16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit unsigned integer elements less than & equal comparisons simultaneously. * * **Description**:\n * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it * is true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are * written to Rd. * * **Operations**:\n * ~~~ * Rd.H[x] = (Rs1.H[x] <=u Rs2.H[x])? 0xffff : 0x0; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UCMPLE16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ucmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.146. UCMPLE16 ===== */ /* ===== Inline Function Start for 3.147. UCMPLT8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP * \brief UCMPLT8 (SIMD 8-bit Unsigned Compare Less Than) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UCMPLT8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit unsigned integer elements less than comparisons simultaneously. * * **Description**:\n * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit * unsigned integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd. * * **Operations**:\n * ~~~ * Rd.B[x] = (Rs1.B[x] (2^8)-1) { * res[x] = (2^8)-1; * OV = 1; * } * Rd.B[x] = res[x]; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKADD8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ukadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.149. UKADD8 ===== */ /* ===== Inline Function Start for 3.150. UKADD16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief UKADD16 (SIMD 16-bit Unsigned Saturating Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UKADD16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit unsigned integer element saturating additions simultaneously. * * **Description**:\n * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit * unsigned integer elements in Rs2. If any of the results are beyond the 16-bit unsigned number * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated * results are written to Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.H[x] + Rs2.H[x]; * if (res[x] > (2^16)-1) { * res[x] = (2^16)-1; * OV = 1; * } * Rd.H[x] = res[x]; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKADD16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ukadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.150. UKADD16 ===== */ /* ===== Inline Function Start for 3.151. UKADD64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB * \brief UKADD64 (64-bit Unsigned Saturating Addition) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * UKADD64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Add two 64-bit unsigned integers. The result is saturated to the U64 range. * * **RV32 Description**:\n * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by * Rs2(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers * specified by Rd(4,1). * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned * integer in Rs2. If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to * the range and the OV bit is set to 1. The saturated result is written to Rd. * * **Operations**:\n * ~~~ * * RV32: * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1); * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1); * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1); * result = R[a_H].R[a_L] + R[b_H].R[b_L]; * if (result > (2^64)-1) { * result = (2^64)-1; OV = 1; * } * R[t_H].R[t_L] = result; * * RV64: * result = Rs1 + Rs2; * if (result > (2^64)-1) { * result = (2^64)-1; OV = 1; * } * Rd = result; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_UKADD64(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("ukadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.151. UKADD64 ===== */ /* ===== Inline Function Start for 3.152. UKADDH ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU * \brief UKADDH (Unsigned Addition with U16 Saturation) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * UKADDH Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Add the unsigned lower 32-bit content of two registers with U16 saturation. * * **Description**:\n * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit * content of Rs2. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag. * * **Operations**:\n * ~~~ * tmp = Rs1.W[0] + Rs2.W[0]; * if (tmp > (2^16)-1) { * tmp = (2^16)-1; * OV = 1; * } * Rd = SE(tmp[15:0]); * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKADDH(unsigned int a, unsigned int b) { unsigned long result; __ASM volatile("ukaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.152. UKADDH ===== */ /* ===== Inline Function Start for 3.153. UKADDW ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief UKADDW (Unsigned Addition with U32 Saturation) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * UKADDW Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Add the unsigned lower 32-bit content of two registers with U32 saturation. * * **Description**:\n * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit * content of Rs2. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag. * * **Operations**:\n * ~~~ * tmp = Rs1.W[0] + Rs2.W[0]; * if (tmp > (2^32)-1) { * tmp[31:0] = (2^32)-1; * OV = 1; * } * Rd = tmp[31:0]; // RV32 * Rd = SE(tmp[31:0]); // RV64 * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKADDW(unsigned int a, unsigned int b) { unsigned long result; __ASM volatile("ukaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.153. UKADDW ===== */ /* ===== Inline Function Start for 3.154. UKCRAS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief UKCRAS16 (SIMD 16-bit Unsigned Saturating Cross Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UKCRAS16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed * positions in 32-bit chunks. * * **Description**:\n * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in * Rs1 with the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it * subtracts the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit * chunks in Rd for subtraction. * * **Operations**:\n * ~~~ * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0]; * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16]; * if (res1 > (2^16)-1) { * res1 = (2^16)-1; * OV = 1; * } * if (res2 < 0) { * res2 = 0; * OV = 1; * } * Rd.W[x][31:16] = res1; * Rd.W[x][15:0] = res2; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKCRAS16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ukcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.154. UKCRAS16 ===== */ /* ===== Inline Function Start for 3.155. UKCRSA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief UKCRSA16 (SIMD 16-bit Unsigned Saturating Cross Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UKCRSA16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from crossed * positions in 32-bit chunks. * * **Description**:\n * This instruction subtracts the 16-bit unsigned integer element in [15:0] of 32-bit * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the * same time, it adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 with the 16- * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of * 32-bit chunks in Rd for addition. * * **Operations**:\n * ~~~ * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0]; * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16]; * if (res1 < 0) { * res1 = 0; * OV = 1; * } else if (res2 > (2^16)-1) { * res2 = (2^16)-1; * OV = 1; * } * Rd.W[x][31:16] = res1; * Rd.W[x][15:0] = res2; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKCRSA16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ukcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.155. UKCRSA16 ===== */ /* ===== Inline Function Start for 3.156. UKMAR64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB * \brief UKMAR64 (Unsigned Multiply and Saturating Add to 64-Bit Data) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * UKMAR64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is * saturated to the U64 range and written back to the pair of registers (RV32) or the register (RV64). * * **RV32 Description**:\n * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers * specified by Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the U64 number * range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is * written back to the even/odd pair of registers specified by Rd(4,1). * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2. * It adds the 64-bit multiplication results to the 64-bit unsigned data in Rd with unlimited precision. If * the 64-bit addition result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the * range and the OV bit is set to 1. The saturated result is written back to Rd. * * **Operations**:\n * ~~~ * * RV32: * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * result = R[t_H].R[t_L] + (Rs1 * Rs2); * if (result > (2^64)-1) { * result = (2^64)-1; OV = 1; * } * R[t_H].R[t_L] = result; * * RV64: * // `result` has unlimited precision * result = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]); * if (result > (2^64)-1) { * result = (2^64)-1; OV = 1; * } * Rd = result; * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_UKMAR64(unsigned long long t, unsigned long a, unsigned long b) { __ASM volatile("ukmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.156. UKMAR64 ===== */ /* ===== Inline Function Start for 3.157. UKMSR64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB * \brief UKMSR64 (Unsigned Multiply and Saturating Subtract from 64-Bit Data) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * UKMSR64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the 32-bit unsigned elements in two registers and subtract the 64-bit * multiplication results from the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). * The result is saturated to the U64 range and written back to the pair of registers (RV32) or a register * (RV64). * * **RV32 Description**:\n * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It * subtracts the 64-bit multiplication result from the 64-bit unsigned data of an even/odd pair of * registers specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the * U64 number range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The * saturated result is written back to the even/odd pair of registers specified by Rd(4,1). * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2. * It subtracts the 64-bit multiplication results from the 64-bit unsigned data of Rd with unlimited * precision. If the 64-bit subtraction result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd. * * **Operations**:\n * ~~~ * * RV32: * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * result = R[t_H].R[t_L] - (Rs1 u* Rs2); * if (result < 0) { * result = 0; OV = 1; * } * R[t_H].R[t_L] = result; * * RV64: * // `result` has unlimited precision * result = Rd - (Rs1.W[0] u* Rs2.W[0]) - (Rs1.W[1] u* Rs2.W[1]); * if (result < 0) { * result = 0; OV = 1; * } * Rd = result; * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_UKMSR64(unsigned long long t, unsigned long a, unsigned long b) { __ASM volatile("ukmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.157. UKMSR64 ===== */ /* ===== Inline Function Start for 3.158. UKSTAS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief UKSTAS16 (SIMD 16-bit Unsigned Saturating Straight Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UKSTAS16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from * corresponding positions in 32-bit chunks. * * **Description**:\n * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in * Rs1 with the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it * subtracts the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit * chunks in Rd for subtraction. * * **Operations**:\n * ~~~ * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16]; * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0]; * if (res1 > (2^16)-1) { * res1 = (2^16)-1; * OV = 1; * } * if (res2 < 0) { * res2 = 0; * OV = 1; * } * Rd.W[x][31:16] = res1; * Rd.W[x][15:0] = res2; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKSTAS16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ukstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.158. UKSTAS16 ===== */ /* ===== Inline Function Start for 3.159. UKSTSA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief UKSTSA16 (SIMD 16-bit Unsigned Saturating Straight Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UKSTSA16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from * corresponding positions in 32-bit chunks. * * **Description**:\n * This instruction subtracts the 16-bit unsigned integer element in [31:16] of 32-bit * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the * same time, it adds the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 with the 16- * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of * 32-bit chunks in Rd for addition. * * **Operations**:\n * ~~~ * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16]; * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0]; * if (res1 < 0) { * res1 = 0; * OV = 1; * } else if (res2 > (2^16)-1) { * res2 = (2^16)-1; * OV = 1; * } * Rd.W[x][31:16] = res1; * Rd.W[x][15:0] = res2; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKSTSA16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ukstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.159. UKSTSA16 ===== */ /* ===== Inline Function Start for 3.160. UKSUB8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB * \brief UKSUB8 (SIMD 8-bit Unsigned Saturating Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UKSUB8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit unsigned integer elements saturating subtractions simultaneously. * * **Description**:\n * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit * unsigned integer elements in Rs1. If any of the results are beyond the 8-bit unsigned number range * (0 <= RES <= 28-1), they are saturated to the range and the OV bit is set to 1. The saturated results are * written to Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.B[x] - Rs2.B[x]; * if (res[x] < 0) { * res[x] = 0; * OV = 1; * } * Rd.B[x] = res[x]; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKSUB8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("uksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.160. UKSUB8 ===== */ /* ===== Inline Function Start for 3.161. UKSUB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief UKSUB16 (SIMD 16-bit Unsigned Saturating Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UKSUB16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit unsigned integer elements saturating subtractions simultaneously. * * **Description**:\n * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit * unsigned integer elements in Rs1. If any of the results are beyond the 16-bit unsigned number * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated * results are written to Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.H[x] - Rs2.H[x]; * if (res[x] < 0) { * res[x] = 0; * OV = 1; * } * Rd.H[x] = res[x]; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKSUB16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("uksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.161. UKSUB16 ===== */ /* ===== Inline Function Start for 3.162. UKSUB64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB * \brief UKSUB64 (64-bit Unsigned Saturating Subtraction) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * UKSUB64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Perform a 64-bit signed integer subtraction. The result is saturated to the U64 range. * * **RV32 Description**:\n * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers * specified by Rs1(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd * pair of registers specified by Rd(4,1). * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` * register of the pair contains the low 32-bit of the operand. * * **RV64 Description**:\n * This instruction subtracts the 64-bit unsigned integer of Rs2 from the 64-bit * unsigned integer of an even/odd pair of Rs1. If the 64-bit result is beyond the U64 number range (0 <= * U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is then written * to Rd. * * **Operations**:\n * ~~~ * * RV32: * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1); * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1); * result = R[a_H].R[a_L] - R[b_H].R[b_L]; * if (result < 0) { * result = 0; OV = 1; * } * R[t_H].R[t_L] = result; * * RV64 * result = Rs1 - Rs2; * if (result < 0) { * result = 0; OV = 1; * } * Rd = result; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_UKSUB64(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("uksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.162. UKSUB64 ===== */ /* ===== Inline Function Start for 3.163. UKSUBH ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU * \brief UKSUBH (Unsigned Subtraction with U16 Saturation) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * UKSUBH Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Subtract the unsigned lower 32-bit content of two registers with U16 saturation. * * **Description**:\n * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit * content of Rs1. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag. * * **Operations**:\n * ~~~ * tmp = Rs1.W[0] - Rs2.W[0]; * if (tmp > (2^16)-1) { * tmp = (2^16)-1; * OV = 1; * } * else if (tmp < 0) { * tmp = 0; * OV = 1; * } * Rd = SE(tmp[15:0]); * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKSUBH(unsigned int a, unsigned int b) { unsigned long result; __ASM volatile("uksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.163. UKSUBH ===== */ /* ===== Inline Function Start for 3.164. UKSUBW ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU * \brief UKSUBW (Unsigned Subtraction with U32 Saturation) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * UKSUBW Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Subtract the unsigned lower 32-bit content of two registers with unsigned 32-bit * saturation. * * **Description**:\n * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit * content of Rs1. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag. * * **Operations**:\n * ~~~ * tmp = Rs1.W[0] - Rs2.W[0]; * if (tmp < 0) { * tmp[31:0] = 0; * OV = 1; * } * Rd = tmp[31:0]; // RV32 * Rd = SE(tmp[31:0]); // RV64 * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKSUBW(unsigned int a, unsigned int b) { unsigned long result; __ASM volatile("uksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.164. UKSUBW ===== */ /* ===== Inline Function Start for 3.165. UMAR64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB * \brief UMAR64 (Unsigned Multiply and Add to 64-Bit Data) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * UMAR64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is * written back to the pair of registers (RV32) or a register (RV64). * * **RV32 Description**:\n * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers * specified by Rd(4,1). The addition result is written back to the even/odd pair of registers specified by * Rd(4,1). * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2. * It adds the 64-bit multiplication results to the 64-bit unsigned data of Rd. The addition result is * written back to Rd. * * **Operations**:\n * ~~~ * * RV32: * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2); * * RV64: * Rd = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]); * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_UMAR64(unsigned long long t, unsigned long a, unsigned long b) { __ASM volatile("umar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.165. UMAR64 ===== */ /* ===== Inline Function Start for 3.166. UMAQA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD * \brief UMAQA (Unsigned Multiply Four Bytes with 32- bit Adds) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * UMAQA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do four unsigned 8-bit multiplications from 32-bit chunks of two registers; and then adds * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together. * * **Description**:\n * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the * unsigned content of the corresponding 32-bit chunks of Rd. The final results are written back to the * corresponding 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * res[x] = Rd.W[x] + (Rs1.W[x].B[3] u* Rs2.W[x].B[3]) + * (Rs1.W[x].B[2] u* Rs2.W[x].B[2]) + (Rs1.W[x].B[1] u* Rs2.W[x].B[1]) + * (Rs1.W[x].B[0] u* Rs2.W[x].B[0]); * Rd.W[x] = res[x]; * for RV32: x=0, * for RV64: x=1...0 * ~~~ * * \param [in] t unsigned long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UMAQA(unsigned long t, unsigned long a, unsigned long b) { __ASM volatile("umaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 3.166. UMAQA ===== */ /* ===== Inline Function Start for 3.167. UMAX8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC * \brief UMAX8 (SIMD 8-bit Unsigned Maximum) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UMAX8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit unsigned integer elements finding maximum operations simultaneously. * * **Description**:\n * This instruction compares the 8-bit unsigned integer elements in Rs1 with the four 8- * bit unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The * two selected results are written to Rd. * * **Operations**:\n * ~~~ * Rd.B[x] = (Rs1.B[x] >u Rs2.B[x])? Rs1.B[x] : Rs2.B[x]; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UMAX8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("umax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.167. UMAX8 ===== */ /* ===== Inline Function Start for 3.168. UMAX16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC * \brief UMAX16 (SIMD 16-bit Unsigned Maximum) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UMAX16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit unsigned integer elements finding maximum operations simultaneously. * * **Description**:\n * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The * selected results are written to Rd. * * **Operations**:\n * ~~~ * Rd.H[x] = (Rs1.H[x] >u Rs2.H[x])? Rs1.H[x] : Rs2.H[x]; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UMAX16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("umax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.168. UMAX16 ===== */ /* ===== Inline Function Start for 3.169. UMIN8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC * \brief UMIN8 (SIMD 8-bit Unsigned Minimum) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * UMIN8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit unsigned integer elements finding minimum operations simultaneously. * * **Description**:\n * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The * selected results are written to Rd. * * **Operations**:\n * ~~~ * Rd.B[x] = (Rs1.B[x] > 1; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URADD8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("uradd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.174. URADD8 ===== */ /* ===== Inline Function Start for 3.175. URADD16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief URADD16 (SIMD 16-bit Unsigned Halving Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * URADD16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit unsigned integer element additions simultaneously. The results are halved to * avoid overflow or saturation. * * **Description**:\n * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then * written to Rd. * * **Examples**:\n * ~~~ * * Ra = 0x7FFF, Rb = 0x7FFF Rt = 0x7FFF * * Ra = 0x8000, Rb = 0x8000 Rt = 0x8000 * * Ra = 0x4000, Rb = 0x8000 Rt = 0x6000 * ~~~ * * **Operations**:\n * ~~~ * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) u>> 1; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URADD16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("uradd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.175. URADD16 ===== */ /* ===== Inline Function Start for 3.176. URADD64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB * \brief URADD64 (64-bit Unsigned Halving Addition) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * URADD64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Add two 64-bit unsigned integers. The result is halved to avoid overflow or saturation. * * **RV32 Description**:\n * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by * Rs2(4,1). The 64-bit addition result is first logically right-shifted by 1 bit and then written to an * even/odd pair of registers specified by Rd(4,1). * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned * integer Rs2. The 64-bit addition result is first logically right-shifted by 1 bit and then written to Rd. * * **Operations**:\n * ~~~ * * RV32: * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1); * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1); * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1); * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) u>> 1; * * RV64: * Rd = (Rs1 + Rs2) u>> 1; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_URADD64(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("uradd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.176. URADD64 ===== */ /* ===== Inline Function Start for 3.177. URADDW ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION * \brief URADDW (32-bit Unsigned Halving Addition) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * URADDW Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Add 32-bit unsigned integers and the results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction adds the first 32-bit unsigned integer in Rs1 with the first 32-bit * unsigned integer in Rs2. The result is first logically right-shifted by 1 bit and then sign-extended and * written to Rd. * * **Examples**:\n * ~~~ * * Ra = 0x7FFFFFFF, Rb = 0x7FFFFFFF Rt = 0x7FFFFFFF * * Ra = 0x80000000, Rb = 0x80000000 Rt = 0x80000000 * * Ra = 0x40000000, Rb = 0x80000000 Rt = 0x60000000 * ~~~ * * **Operations**:\n * ~~~ * * RV32: * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1; * * RV64: * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1; * Rd[63:0] = SE(resw[31:0]); * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URADDW(unsigned int a, unsigned int b) { unsigned long result; __ASM volatile("uraddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.177. URADDW ===== */ /* ===== Inline Function Start for 3.178. URCRAS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief URCRAS16 (SIMD 16-bit Unsigned Halving Cross Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * URCRAS16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element * subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. * The results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1 * with the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned * integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32- * bit chunks in Rd and [15:0] of 32-bit chunks in Rd. * * **Examples**:\n * ~~~ * Please see `URADD16` and `URSUB16` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) u>> 1; * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) u>> 1; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URCRAS16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("urcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.178. URCRAS16 ===== */ /* ===== Inline Function Start for 3.179. URCRSA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief URCRSA16 (SIMD 16-bit Unsigned Halving Cross Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * URCRSA16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element * addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. * The results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2 * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [31:16] of 32-bit chunks * in Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit * chunks in Rd and [15:0] of 32-bit chunks in Rd. * * **Examples**:\n * ~~~ * Please see `URADD16` and `URSUB16` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) u>> 1; * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) u>> 1; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URCRSA16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("urcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.179. URCRSA16 ===== */ /* ===== Inline Function Start for 3.180. URSTAS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief URSTAS16 (SIMD 16-bit Unsigned Halving Straight Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * URSTAS16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element * subtraction in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit * chunks. The results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1 * with the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned * integer in [15:0] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32- * bit chunks in Rd and [15:0] of 32-bit chunks in Rd. * * **Examples**:\n * ~~~ * Please see `URADD16` and `URSUB16` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) u>> 1; * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) u>> 1; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URSTAS16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("urstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.180. URSTAS16 ===== */ /* ===== Inline Function Start for 3.181. URSTSA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief URSTSA16 (SIMD 16-bit Unsigned Halving Straight Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * URCRSA16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element * addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit * chunks. The results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2 * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [15:0] of 32-bit chunks in * Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit * chunks in Rd and [15:0] of 32-bit chunks in Rd. * * **Examples**:\n * ~~~ * Please see `URADD16` and `URSUB16` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) u>> 1; * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) u>> 1; * for RV32, x=0 * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URSTSA16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("urstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.181. URSTSA16 ===== */ /* ===== Inline Function Start for 3.182. URSUB8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB * \brief URSUB8 (SIMD 8-bit Unsigned Halving Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * URSUB8 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 8-bit unsigned integer element subtractions simultaneously. The results are halved to * avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then * written to Rd. * * **Examples**:\n * ~~~ * * Ra = 0x7F, Rb = 0x80 Rt = 0xFF * * Ra = 0x80, Rb = 0x7F Rt = 0x00 * * Ra = 0x80, Rb = 0x40 Rt = 0x20 * ~~~ * * **Operations**:\n * ~~~ * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) u>> 1; * for RV32: x=3...0, * for RV64: x=7...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URSUB8(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ursub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.182. URSUB8 ===== */ /* ===== Inline Function Start for 3.183. URSUB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB * \brief URSUB16 (SIMD 16-bit Unsigned Halving Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * URSUB16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 16-bit unsigned integer element subtractions simultaneously. The results are halved to * avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then * written to Rd. * * **Examples**:\n * ~~~ * * Ra = 0x7FFF, Rb = 0x8000 Rt = 0xFFFF * * Ra = 0x8000, Rb = 0x7FFF Rt = 0x0000 * * Ra = 0x8000, Rb = 0x4000 Rt = 0x2000 * ~~~ * * **Operations**:\n * ~~~ * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) u>> 1; * for RV32: x=1...0, * for RV64: x=3...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URSUB16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ursub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.183. URSUB16 ===== */ /* ===== Inline Function Start for 3.184. URSUB64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB * \brief URSUB64 (64-bit Unsigned Halving Subtraction) * \details * **Type**: DSP (64-bit Profile) * * **Syntax**:\n * ~~~ * URSUB64 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Perform a 64-bit unsigned integer subtraction. The result is halved to avoid overflow or * saturation. * * **RV32 Description**:\n * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers * specified by Rs1(4,1). The subtraction result is first logically right-shifted by 1 bit and then written * to an even/odd pair of registers specified by Rd(4,1). * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair * includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register * of the pair contains the low 32-bit of the result. * * **RV64 Description**:\n * This instruction subtracts the 64-bit unsigned integer in Rs2 from the 64-bit * unsigned integer in Rs1. The subtraction result is first logically right-shifted by 1 bit and then * written to Rd. * * **Operations**:\n * ~~~ * * RV32: * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1); * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1); * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1); * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) u>> 1; * * RV64: * Rd = (Rs1 - Rs2) u>> 1; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_URSUB64(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("ursub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.184. URSUB64 ===== */ /* ===== Inline Function Start for 3.185. URSUBW ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION * \brief URSUBW (32-bit Unsigned Halving Subtraction) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * URSUBW Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Subtract 32-bit unsigned integers and the result is halved to avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit * signed integer in Rs1. The result is first logically right-shifted by 1 bit and then sign-extended and * written to Rd. * * **Examples**:\n * ~~~ * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0xFFFFFFFF * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x00000000 * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0x20000000 * ~~~ * * **Operations**:\n * ~~~ * * RV32: * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1; * * RV64: * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1; * Rd[63:0] = SE(resw[31:0]); * ~~~ * * \param [in] a unsigned int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URSUBW(unsigned int a, unsigned int b) { unsigned long result; __ASM volatile("ursubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.185. URSUBW ===== */ /* ===== Inline Function Start for 3.186. WEXTI ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC * \brief WEXTI (Extract Word from 64-bit Immediate) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * WEXTI Rd, Rs1, #LSBloc * ~~~ * * **Purpose**:\n * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or * a register (RV64) starting from a specified immediate LSB bit position. * * **RV32 Description**:\n * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified * by Rs1(4,1) starting from a specified immediate LSB bit position, #LSBloc. The extracted word is * written to Rd. * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register * pair includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d` * register of the pair contains the low 32-bit of the 64-bit value. * * **RV64 Description**:\n * This instruction extracts a 32-bit word from a 64-bit value in Rs1 starting from a specified * immediate LSB bit position, #LSBloc. The extracted word is sign-extended and written to lower 32- * bit of Rd. * * **Operations**:\n * ~~~ * * RV32: * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs2(4,1),1'b1); * src[63:0] = Concat(R[Idx1], R[Idx0]); * Rd = src[31+LSBloc:LSBloc]; * * RV64: * ExtractW = Rs1[31+LSBloc:LSBloc]; * Rd = SE(ExtractW) * ~~~ * * \param [in] a long long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_WEXTI(a, b) \ ({ \ unsigned long result; \ long long __a = (long long)(a); \ __ASM volatile("wexti %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 3.186. WEXTI ===== */ /* ===== Inline Function Start for 3.187. WEXT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC * \brief WEXT (Extract Word from 64-bit) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * WEXT Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or * a register (RV64) starting from a specified LSB bit position in a register. * * **RV32 Description**:\n * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified * by Rs1(4,1) starting from a specified LSB bit position, specified in Rs2[4:0]. The extracted word is * written to Rd. * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register * pair includes register 2d and 2d+1. * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d` * register of the pair contains the low 32-bit of the 64-bit value. * * **Operations**:\n * ~~~ * * RV32: * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1); * src[63:0] = Concat(R[Idx1], R[Idx0]); * LSBloc = Rs2[4:0]; * Rd = src[31+LSBloc:LSBloc]; * * RV64: * LSBloc = Rs2[4:0]; * ExtractW = Rs1[31+LSBloc:LSBloc]; * Rd = SE(ExtractW) * ~~~ * * \param [in] a long long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_WEXT(long long a, unsigned int b) { unsigned long result; __ASM volatile("wext %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 3.187. WEXT ===== */ /* ===== Inline Function Start for 3.188.1. ZUNPKD810 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK * \brief ZUNPKD810 (Unsigned Unpacking Bytes 1 & 0) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * ZUNPKD8xy Rd, Rs1 * xy = {10, 20, 30, 31, 32} * ~~~ * * **Purpose**:\n * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned * halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit * chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x]) * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y]) * // ZUNPKD810, x=1,y=0 * // ZUNPKD820, x=2,y=0 * // ZUNPKD830, x=3,y=0 * // ZUNPKD831, x=3,y=1 * // ZUNPKD832, x=3,y=2 * for RV32: m=0, * for RV64: m=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD810(unsigned long a) { unsigned long result; __ASM volatile("zunpkd810 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.188.1. ZUNPKD810 ===== */ /* ===== Inline Function Start for 3.188.2. ZUNPKD820 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK * \brief ZUNPKD820 (Unsigned Unpacking Bytes 2 & 0) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * ZUNPKD8xy Rd, Rs1 * xy = {10, 20, 30, 31, 32} * ~~~ * * **Purpose**:\n * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned * halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit * chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x]) * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y]) * // ZUNPKD810, x=1,y=0 * // ZUNPKD820, x=2,y=0 * // ZUNPKD830, x=3,y=0 * // ZUNPKD831, x=3,y=1 * // ZUNPKD832, x=3,y=2 * for RV32: m=0, * for RV64: m=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD820(unsigned long a) { unsigned long result; __ASM volatile("zunpkd820 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.188.2. ZUNPKD820 ===== */ /* ===== Inline Function Start for 3.188.3. ZUNPKD830 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK * \brief ZUNPKD830 (Unsigned Unpacking Bytes 3 & 0) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * ZUNPKD8xy Rd, Rs1 * xy = {10, 20, 30, 31, 32} * ~~~ * * **Purpose**:\n * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned * halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit * chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x]) * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y]) * // ZUNPKD810, x=1,y=0 * // ZUNPKD820, x=2,y=0 * // ZUNPKD830, x=3,y=0 * // ZUNPKD831, x=3,y=1 * // ZUNPKD832, x=3,y=2 * for RV32: m=0, * for RV64: m=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD830(unsigned long a) { unsigned long result; __ASM volatile("zunpkd830 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.188.3. ZUNPKD830 ===== */ /* ===== Inline Function Start for 3.188.4. ZUNPKD831 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK * \brief ZUNPKD831 (Unsigned Unpacking Bytes 3 & 1) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * ZUNPKD8xy Rd, Rs1 * xy = {10, 20, 30, 31, 32} * ~~~ * * **Purpose**:\n * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned * halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit * chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x]) * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y]) * // ZUNPKD810, x=1,y=0 * // ZUNPKD820, x=2,y=0 * // ZUNPKD830, x=3,y=0 * // ZUNPKD831, x=3,y=1 * // ZUNPKD832, x=3,y=2 * for RV32: m=0, * for RV64: m=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD831(unsigned long a) { unsigned long result; __ASM volatile("zunpkd831 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.188.4. ZUNPKD831 ===== */ /* ===== Inline Function Start for 3.188.5. ZUNPKD832 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK * \brief ZUNPKD832 (Unsigned Unpacking Bytes 3 & 2) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * ZUNPKD8xy Rd, Rs1 * xy = {10, 20, 30, 31, 32} * ~~~ * * **Purpose**:\n * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned * halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit * chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x]) * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y]) * // ZUNPKD810, x=1,y=0 * // ZUNPKD820, x=2,y=0 * // ZUNPKD830, x=3,y=0 * // ZUNPKD831, x=3,y=1 * // ZUNPKD832, x=3,y=2 * for RV32: m=0, * for RV64: m=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD832(unsigned long a) { unsigned long result; __ASM volatile("zunpkd832 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 3.188.5. ZUNPKD832 ===== */ #if (__RISCV_XLEN == 64) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__) /* ===== Inline Function Start for 4.1. ADD32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief ADD32 (SIMD 32-bit Addition) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * ADD32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit integer element additions simultaneously. * * **Description**:\n * This instruction adds the 32-bit integer elements in Rs1 with the 32-bit integer * elements in Rs2, and then writes the 32-bit element results to Rd. * * **Note**:\n * This instruction can be used for either signed or unsigned addition. * * **Operations**:\n * ~~~ * Rd.W[x] = Rs1.W[x] + Rs2.W[x]; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_ADD32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("add32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.1. ADD32 ===== */ /* ===== Inline Function Start for 4.2. CRAS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief CRAS32 (SIMD 32-bit Cross Addition & Subtraction) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * CRAS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit * chunk simultaneously. Operands are from crossed 32-bit elements. * * **Description**:\n * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit * integer element in [31:0] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts * the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and * writes the result to [31:0] of Rd. * * **Note**:\n * This instruction can be used for either signed or unsigned operations. * * **Operations**:\n * ~~~ * Rd.W[1] = Rs1.W[1] + Rs2.W[0]; * Rd.W[0] = Rs1.W[0] - Rs2.W[1]; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CRAS32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("cras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.2. CRAS32 ===== */ /* ===== Inline Function Start for 4.3. CRSA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief CRSA32 (SIMD 32-bit Cross Subtraction & Addition) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * CRSA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit * chunk simultaneously. Operands are from crossed 32-bit elements. * *Description: * * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element * in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer * element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2, and writes the result to * [31:0] of Rd * * **Note**:\n * This instruction can be used for either signed or unsigned operations. * * **Operations**:\n * ~~~ * Rd.W[1] = Rs1.W[1] - Rs2.W[0]; * Rd.W[0] = Rs1.W[0] + Rs2.W[1]; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_CRSA32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("crsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.3. CRSA32 ===== */ /* ===== Inline Function Start for 4.4. KABS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC * \brief KABS32 (Scalar 32-bit Absolute Value with Saturation) * \details * **Type**: DSP (RV64 Only) 24 20 19 15 14 12 11 7 KABS32 10010 Rs1 000 Rd 6 0 GE80B 1111111 * * **Syntax**:\n * ~~~ * KABS32 Rd, Rs1 * ~~~ * * **Purpose**:\n * Get the absolute value of signed 32-bit integer elements in a general register. * * **Description**:\n * This instruction calculates the absolute value of signed 32-bit integer elements stored * in Rs1. The results are written to Rd. This instruction with the minimum negative integer input of * 0x80000000 will produce a saturated output of maximum positive integer of 0x7fffffff and the OV * flag will be set to 1. * * **Operations**:\n * ~~~ * if (Rs1.W[x] >= 0) { * res[x] = Rs1.W[x]; * } else { * If (Rs1.W[x] == 0x80000000) { * res[x] = 0x7fffffff; * OV = 1; * } else { * res[x] = -Rs1.W[x]; * } * } * Rd.W[x] = res[x]; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KABS32(unsigned long a) { unsigned long result; __ASM volatile("kabs32 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for 4.4. KABS32 ===== */ /* ===== Inline Function Start for 4.5. KADD32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief KADD32 (SIMD 32-bit Signed Saturating Addition) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * KADD32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element saturating additions simultaneously. * * **Description**:\n * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed * integer elements in Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.W[x] + Rs2.W[x]; * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KADD32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("kadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.5. KADD32 ===== */ /* ===== Inline Function Start for 4.6. KCRAS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief KCRAS32 (SIMD 32-bit Signed Saturating Cross Addition & Subtraction) * \details * **Type**: SIM (RV64 Only) * * **Syntax**:\n * ~~~ * KCRAS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element saturating addition and 32-bit signed integer element * saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. * * **Description**:\n * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit * integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit integer element in [63:32] of * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction. * * **Operations**:\n * ~~~ * res[1] = Rs1.W[1] + Rs2.W[0]; * res[0] = Rs1.W[0] - Rs2.W[1]; * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[1] = res[1]; * Rd.W[0] = res[0]; * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KCRAS32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("kcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.6. KCRAS32 ===== */ /* ===== Inline Function Start for 4.7. KCRSA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief KCRSA32 (SIMD 32-bit Signed Saturating Cross Subtraction & Addition) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * KCRSA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element * saturating addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. * *Description: * * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element * in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit * integer element in [63:32] of Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31 * <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to * [63:32] of Rd for subtraction and [31:0] of Rd for addition. * * **Operations**:\n * ~~~ * res[1] = Rs1.W[1] - Rs2.W[0]; * res[0] = Rs1.W[0] + Rs2.W[1]; * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[1] = res[1]; * Rd.W[0] = res[0]; * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KCRSA32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("kcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.7. KCRSA32 ===== */ /* ===== Inline Function Start for 4.8.1. KDMBB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT * \brief KDMBB16 (SIMD Signed Saturating Double Multiply B16 x B16) * \details * **Type**: SIMD (RV64 only) * * **Syntax**:\n * ~~~ * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks * in the destination register. If saturation happens, an overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF * and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1) * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1) * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1) * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y]; * If (0x8000 != aop[z] | 0x8000 != bop[z]) { * Mresult[z] = aop[z] * bop[z]; * resQ31[z] = Mresult[z] << 1; * } else { * resQ31[z] = 0x7FFFFFFF; * OV = 1; * } * Rd.W[z] = resQ31[z]; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KDMBB16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("kdmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.8.1. KDMBB16 ===== */ /* ===== Inline Function Start for 4.8.2. KDMBT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT * \brief KDMBT16 (SIMD Signed Saturating Double Multiply B16 x T16) * \details * **Type**: SIMD (RV64 only) * * **Syntax**:\n * ~~~ * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks * in the destination register. If saturation happens, an overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF * and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1) * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1) * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1) * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y]; * If (0x8000 != aop[z] | 0x8000 != bop[z]) { * Mresult[z] = aop[z] * bop[z]; * resQ31[z] = Mresult[z] << 1; * } else { * resQ31[z] = 0x7FFFFFFF; * OV = 1; * } * Rd.W[z] = resQ31[z]; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KDMBT16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("kdmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.8.2. KDMBT16 ===== */ /* ===== Inline Function Start for 4.8.3. KDMTT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT * \brief KDMTT16 (SIMD Signed Saturating Double Multiply T16 x T16) * \details * **Type**: SIMD (RV64 only) * * **Syntax**:\n * ~~~ * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks * in the destination register. If saturation happens, an overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF * and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1) * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1) * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1) * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y]; * If (0x8000 != aop[z] | 0x8000 != bop[z]) { * Mresult[z] = aop[z] * bop[z]; * resQ31[z] = Mresult[z] << 1; * } else { * resQ31[z] = 0x7FFFFFFF; * OV = 1; * } * Rd.W[z] = resQ31[z]; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KDMTT16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("kdmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.8.3. KDMTT16 ===== */ /* ===== Inline Function Start for 4.9.1. KDMABB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT * \brief KDMABB16 (SIMD Signed Saturating Double Multiply Addition B16 x B16) * \details * **Type**: SIMD (RV64 only) * * **Syntax**:\n * ~~~ * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with * the values of the corresponding 32-bit chunks from the destination register and write the saturated * addition results back into the corresponding 32-bit chunks of the destination register. If saturation * happens, an overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then * doubled and saturated into Q31 values. The Q31 values are then added with the content of the * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <= * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation * are written back to Rd. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be * set. * * **Operations**:\n * ~~~ * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1) * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1) * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1) * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y]; * If (0x8000 != aop[z] | 0x8000 != bop[z]) { * Mresult[z] = aop[z] * bop[z]; * resQ31[z] = Mresult[z] << 1; * } else { * resQ31[z] = 0x7FFFFFFF; * OV = 1; * } * resadd[z] = Rd.W[z] + resQ31[z]; * if (resadd[z] > (2^31)-1) { * resadd[z] = (2^31)-1; * OV = 1; * } else if (resadd[z] < -2^31) { * resadd[z] = -2^31; * OV = 1; * } * Rd.W[z] = resadd[z]; * ~~~ * * \param [in] t unsigned long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KDMABB16(unsigned long t, unsigned long a, unsigned long b) { __ASM volatile("kdmabb16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 4.9.1. KDMABB16 ===== */ /* ===== Inline Function Start for 4.9.2. KDMABT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT * \brief KDMABT16 (SIMD Signed Saturating Double Multiply Addition B16 x T16) * \details * **Type**: SIMD (RV64 only) * * **Syntax**:\n * ~~~ * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with * the values of the corresponding 32-bit chunks from the destination register and write the saturated * addition results back into the corresponding 32-bit chunks of the destination register. If saturation * happens, an overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then * doubled and saturated into Q31 values. The Q31 values are then added with the content of the * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <= * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation * are written back to Rd. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be * set. * * **Operations**:\n * ~~~ * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1) * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1) * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1) * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y]; * If (0x8000 != aop[z] | 0x8000 != bop[z]) { * Mresult[z] = aop[z] * bop[z]; * resQ31[z] = Mresult[z] << 1; * } else { * resQ31[z] = 0x7FFFFFFF; * OV = 1; * } * resadd[z] = Rd.W[z] + resQ31[z]; * if (resadd[z] > (2^31)-1) { * resadd[z] = (2^31)-1; * OV = 1; * } else if (resadd[z] < -2^31) { * resadd[z] = -2^31; * OV = 1; * } * Rd.W[z] = resadd[z]; * ~~~ * * \param [in] t unsigned long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KDMABT16(unsigned long t, unsigned long a, unsigned long b) { __ASM volatile("kdmabt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 4.9.2. KDMABT16 ===== */ /* ===== Inline Function Start for 4.9.3. KDMATT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT * \brief KDMATT16 (SIMD Signed Saturating Double Multiply Addition T16 x T16) * \details * **Type**: SIMD (RV64 only) * * **Syntax**:\n * ~~~ * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with * the values of the corresponding 32-bit chunks from the destination register and write the saturated * addition results back into the corresponding 32-bit chunks of the destination register. If saturation * happens, an overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then * doubled and saturated into Q31 values. The Q31 values are then added with the content of the * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <= * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation * are written back to Rd. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be * set. * * **Operations**:\n * ~~~ * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1) * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1) * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1) * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y]; * If (0x8000 != aop[z] | 0x8000 != bop[z]) { * Mresult[z] = aop[z] * bop[z]; * resQ31[z] = Mresult[z] << 1; * } else { * resQ31[z] = 0x7FFFFFFF; * OV = 1; * } * resadd[z] = Rd.W[z] + resQ31[z]; * if (resadd[z] > (2^31)-1) { * resadd[z] = (2^31)-1; * OV = 1; * } else if (resadd[z] < -2^31) { * resadd[z] = -2^31; * OV = 1; * } * Rd.W[z] = resadd[z]; * ~~~ * * \param [in] t unsigned long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KDMATT16(unsigned long t, unsigned long a, unsigned long b) { __ASM volatile("kdmatt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 4.9.3. KDMATT16 ===== */ /* ===== Inline Function Start for 4.10.1. KHMBB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT * \brief KHMBB16 (SIMD Signed Saturating Half Multiply B16 x B16) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15 * numbers again and saturate the Q15 results into the destination register. If saturation happens, an * overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15- * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated * to 0x7FFF and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1) * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1) * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1) * aop = Rs1.H[x]; bop = Rs2.H[y]; * If (0x8000 != aop | 0x8000 != bop) { * Mresult[31:0] = aop * bop; * res[15:0] = Mresult[30:15]; * } else { * res[15:0] = 0x7FFF; * OV = 1; * } * Rd.W[z] = SE32(res[15:0]); * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KHMBB16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("khmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.10.1. KHMBB16 ===== */ /* ===== Inline Function Start for 4.10.2. KHMBT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT * \brief KHMBT16 (SIMD Signed Saturating Half Multiply B16 x T16) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15 * numbers again and saturate the Q15 results into the destination register. If saturation happens, an * overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15- * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated * to 0x7FFF and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1) * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1) * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1) * aop = Rs1.H[x]; bop = Rs2.H[y]; * If (0x8000 != aop | 0x8000 != bop) { * Mresult[31:0] = aop * bop; * res[15:0] = Mresult[30:15]; * } else { * res[15:0] = 0x7FFF; * OV = 1; * } * Rd.W[z] = SE32(res[15:0]); * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KHMBT16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("khmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.10.2. KHMBT16 ===== */ /* ===== Inline Function Start for 4.10.3. KHMTT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT * \brief KHMTT16 (SIMD Signed Saturating Half Multiply T16 x T16) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) * ~~~ * * **Purpose**:\n * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15 * numbers again and saturate the Q15 results into the destination register. If saturation happens, an * overflow flag OV will be set. * * **Description**:\n * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15- * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated * to 0x7FFF and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1) * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1) * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1) * aop = Rs1.H[x]; bop = Rs2.H[y]; * If (0x8000 != aop | 0x8000 != bop) { * Mresult[31:0] = aop * bop; * res[15:0] = Mresult[30:15]; * } else { * res[15:0] = 0x7FFF; * OV = 1; * } * Rd.W[z] = SE32(res[15:0]); * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KHMTT16(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("khmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.10.3. KHMTT16 ===== */ /* ===== Inline Function Start for 4.11.1. KMABB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD * \brief KMABB32 (Saturating Signed Multiply Bottom Words & Add) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * KMABB32 Rd, Rs1, Rs2 * KMABT32 Rd, Rs1, Rs2 * KMATT32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element in a register with the 32-bit element in another register * and add the result to the content of 64-bit data in the third register. The addition result may be * saturated and is written to the third register. * * KMABB32: rd + bottom*bottom * * KMABT32: rd + bottom*top * * KMATT32: rd + top*top * * **Description**:\n * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit * element in Rs2. * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit * element in Rs2. * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit * element in Rs2. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed * integers. * * **Operations**:\n * ~~~ * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32 * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32 * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32 * if (res > (2^63)-1) { * res = (2^63)-1; * OV = 1; * } else if (res < -2^63) { * res = -2^63; * OV = 1; * } * Rd = res; * *Exceptions:* None * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMABB32(long t, unsigned long a, unsigned long b) { __ASM volatile("kmabb32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 4.11.1. KMABB32 ===== */ /* ===== Inline Function Start for 4.11.2. KMABT32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD * \brief KMABT32 (Saturating Signed Multiply Bottom & Top Words & Add) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * KMABB32 Rd, Rs1, Rs2 * KMABT32 Rd, Rs1, Rs2 * KMATT32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element in a register with the 32-bit element in another register * and add the result to the content of 64-bit data in the third register. The addition result may be * saturated and is written to the third register. * * KMABB32: rd + bottom*bottom * * KMABT32: rd + bottom*top * * KMATT32: rd + top*top * * **Description**:\n * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit * element in Rs2. * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit * element in Rs2. * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit * element in Rs2. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed * integers. * * **Operations**:\n * ~~~ * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32 * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32 * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32 * if (res > (2^63)-1) { * res = (2^63)-1; * OV = 1; * } else if (res < -2^63) { * res = -2^63; * OV = 1; * } * Rd = res; * *Exceptions:* None * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMABT32(long t, unsigned long a, unsigned long b) { __ASM volatile("kmabt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 4.11.2. KMABT32 ===== */ /* ===== Inline Function Start for 4.11.3. KMATT32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD * \brief KMATT32 (Saturating Signed Multiply Top Words & Add) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * KMABB32 Rd, Rs1, Rs2 * KMABT32 Rd, Rs1, Rs2 * KMATT32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element in a register with the 32-bit element in another register * and add the result to the content of 64-bit data in the third register. The addition result may be * saturated and is written to the third register. * * KMABB32: rd + bottom*bottom * * KMABT32: rd + bottom*top * * KMATT32: rd + top*top * * **Description**:\n * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit * element in Rs2. * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit * element in Rs2. * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit * element in Rs2. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed * integers. * * **Operations**:\n * ~~~ * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32 * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32 * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32 * if (res > (2^63)-1) { * res = (2^63)-1; * OV = 1; * } else if (res < -2^63) { * res = -2^63; * OV = 1; * } * Rd = res; * *Exceptions:* None * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMATT32(long t, unsigned long a, unsigned long b) { __ASM volatile("kmatt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 4.11.3. KMATT32 ===== */ /* ===== Inline Function Start for 4.12.1. KMADA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC * \brief KMADA32 (Saturating Signed Multiply Two Words and Two Adds) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * KMADA32 Rd, Rs1, Rs2 * KMAXDA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated. * * KMADA32: rd + top*top + bottom*bottom * * KMAXDA32: rd + top*bottom + bottom*top * * **Description**:\n * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32- * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1 * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction. * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1 * with the top 32-bit element in Rs2. * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32 * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32 * if (res > (2^63)-1) { * res = (2^63)-1; * OV = 1; * } else if (res < -2^63) { * res = -2^63; * OV = 1; * } * Rd = res; * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMADA32(long t, unsigned long a, unsigned long b) { __ASM volatile("kmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 4.12.1. KMADA32 ===== */ /* ===== Inline Function Start for 4.12.2. KMAXDA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC * \brief KMAXDA32 (Saturating Signed Crossed Multiply Two Words and Two Adds) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * KMADA32 Rd, Rs1, Rs2 * KMAXDA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated. * * KMADA32: rd + top*top + bottom*bottom * * KMAXDA32: rd + top*bottom + bottom*top * * **Description**:\n * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32- * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1 * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction. * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1 * with the top 32-bit element in Rs2. * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32 * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32 * if (res > (2^63)-1) { * res = (2^63)-1; * OV = 1; * } else if (res < -2^63) { * res = -2^63; * OV = 1; * } * Rd = res; * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMAXDA32(long t, unsigned long a, unsigned long b) { __ASM volatile("kmaxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 4.12.2. KMAXDA32 ===== */ /* ===== Inline Function Start for 4.13.1. KMDA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC * \brief KMDA32 (Signed Multiply Two Words and Add) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * KMDA32 Rd, Rs1, Rs2 * KMXDA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then * adds the two 64-bit results together. The addition result may be saturated. * * KMDA32: top*top + bottom*bottom * * KMXDA32: top*bottom + bottom*top * * **Description**:\n * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1 * with the top 32-bit element of Rs2. * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1 * with the bottom 32-bit element of Rs2. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1. * The final result is written to Rd. The 32-bit contents are treated as signed integers. * * **Operations**:\n * ~~~ * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) { * Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32 * Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32 * } else { * Rd = 0x7fffffffffffffff; * OV = 1; * } * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMDA32(unsigned long a, unsigned long b) { long result; __ASM volatile("kmda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.13.1. KMDA32 ===== */ /* ===== Inline Function Start for 4.13.2. KMXDA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC * \brief KMXDA32 (Signed Crossed Multiply Two Words and Add) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * KMDA32 Rd, Rs1, Rs2 * KMXDA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then * adds the two 64-bit results together. The addition result may be saturated. * * KMDA32: top*top + bottom*bottom * * KMXDA32: top*bottom + bottom*top * * **Description**:\n * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1 * with the top 32-bit element of Rs2. * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1 * with the bottom 32-bit element of Rs2. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1. * The final result is written to Rd. The 32-bit contents are treated as signed integers. * * **Operations**:\n * ~~~ * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) { * Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32 * Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32 * } else { * Rd = 0x7fffffffffffffff; * OV = 1; * } * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMXDA32(unsigned long a, unsigned long b) { long result; __ASM volatile("kmxda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.13.2. KMXDA32 ===== */ /* ===== Inline Function Start for 4.14.1. KMADS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC * \brief KMADS32 (Saturating Signed Multiply Two Words & Subtract & Add) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * KMADS32 Rd, Rs1, Rs2 * KMADRS32 Rd, Rs1, Rs2 * KMAXDS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to * 64-bit data in a third register. The addition result may be saturated. * * KMADS32: rd + (top*top - bottom*bottom) * * KMADRS32: rd + (bottom*bottom - top*top) * * KMAXDS32: rd + (top*bottom - bottom*top) * * **Description**:\n * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in * Rs1 with the top 32-bit element in Rs2. * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit * element in Rs1 with the bottom 32-bit element in Rs2. * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in * Rs1 with the bottom 32-bit element in Rs2. * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated * as signed integers. * * **Operations**:\n * ~~~ * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32 * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32 * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32 * if (res > (2^63)-1) { * res = (2^63)-1; * OV = 1; * } else if (res < -2^63) { * res = -2^63; * OV = 1; * } * Rd = res; * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMADS32(long t, unsigned long a, unsigned long b) { __ASM volatile("kmads32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 4.14.1. KMADS32 ===== */ /* ===== Inline Function Start for 4.14.2. KMADRS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC * \brief KMADRS32 (Saturating Signed Multiply Two Words & Reverse Subtract & Add) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * KMADS32 Rd, Rs1, Rs2 * KMADRS32 Rd, Rs1, Rs2 * KMAXDS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to * 64-bit data in a third register. The addition result may be saturated. * * KMADS32: rd + (top*top - bottom*bottom) * * KMADRS32: rd + (bottom*bottom - top*top) * * KMAXDS32: rd + (top*bottom - bottom*top) * * **Description**:\n * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in * Rs1 with the top 32-bit element in Rs2. * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit * element in Rs1 with the bottom 32-bit element in Rs2. * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in * Rs1 with the bottom 32-bit element in Rs2. * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated * as signed integers. * * **Operations**:\n * ~~~ * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32 * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32 * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32 * if (res > (2^63)-1) { * res = (2^63)-1; * OV = 1; * } else if (res < -2^63) { * res = -2^63; * OV = 1; * } * Rd = res; * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMADRS32(long t, unsigned long a, unsigned long b) { __ASM volatile("kmadrs32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 4.14.2. KMADRS32 ===== */ /* ===== Inline Function Start for 4.14.3. KMAXDS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC * \brief KMAXDS32 (Saturating Signed Crossed Multiply Two Words & Subtract & Add) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * KMADS32 Rd, Rs1, Rs2 * KMADRS32 Rd, Rs1, Rs2 * KMAXDS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to * 64-bit data in a third register. The addition result may be saturated. * * KMADS32: rd + (top*top - bottom*bottom) * * KMADRS32: rd + (bottom*bottom - top*top) * * KMAXDS32: rd + (top*bottom - bottom*top) * * **Description**:\n * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in * Rs1 with the top 32-bit element in Rs2. * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit * element in Rs1 with the bottom 32-bit element in Rs2. * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in * Rs1 with the bottom 32-bit element in Rs2. * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated * as signed integers. * * **Operations**:\n * ~~~ * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32 * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32 * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32 * if (res > (2^63)-1) { * res = (2^63)-1; * OV = 1; * } else if (res < -2^63) { * res = -2^63; * OV = 1; * } * Rd = res; * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMAXDS32(long t, unsigned long a, unsigned long b) { __ASM volatile("kmaxds32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 4.14.3. KMAXDS32 ===== */ /* ===== Inline Function Start for 4.15.1. KMSDA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC * \brief KMSDA32 (Saturating Signed Multiply Two Words & Add & Subtract) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * KMSDA32 Rd, Rs1, Rs2 * KMSXDA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then * subtracts the two 64-bit results from a third register. The subtraction result may be saturated. * * KMSDA: rd - top*top - bottom*bottom * * KMSXDA: rd - top*bottom - bottom*top * * **Description**:\n * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2. * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed * integers. * * **Operations**:\n * ~~~ * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32 * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32 * if (res > (2^63)-1) { * res = (2^63)-1; * OV = 1; * } else if (res < -2^63) { * res = -2^63; * OV = 1; * } * Rd = res; * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMSDA32(long t, unsigned long a, unsigned long b) { __ASM volatile("kmsda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 4.15.1. KMSDA32 ===== */ /* ===== Inline Function Start for 4.15.2. KMSXDA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC * \brief KMSXDA32 (Saturating Signed Crossed Multiply Two Words & Add & Subtract) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * KMSDA32 Rd, Rs1, Rs2 * KMSXDA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then * subtracts the two 64-bit results from a third register. The subtraction result may be saturated. * * KMSDA: rd - top*top - bottom*bottom * * KMSXDA: rd - top*bottom - bottom*top * * **Description**:\n * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2. * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed * integers. * * **Operations**:\n * ~~~ * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32 * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32 * if (res > (2^63)-1) { * res = (2^63)-1; * OV = 1; * } else if (res < -2^63) { * res = -2^63; * OV = 1; * } * Rd = res; * ~~~ * * \param [in] t long type of value stored in t * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_KMSXDA32(long t, unsigned long a, unsigned long b) { __ASM volatile("kmsxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for 4.15.2. KMSXDA32 ===== */ /* ===== Inline Function Start for 4.16. KSLL32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT * \brief KSLL32 (SIMD 32-bit Saturating Shift Left Logical) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * KSLL32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift * amount is a variable from a GPR. * * **Description**:\n * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled * with zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register. * Any shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is * saturated to -2^31. And the saturated results are written to Rd. If any saturation is performed, set OV * bit to 1. * * **Operations**:\n * ~~~ * sa = Rs2[4:0]; * if (sa != 0) { * res[(31+sa):0] = Rs1.W[x] << sa; * if (res > (2^31)-1) { * res = 0x7fffffff; OV = 1; * } else if (res < -2^31) { * res = 0x80000000; OV = 1; * } * Rd.W[x] = res[31:0]; * } else { * Rd = Rs1; * } * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSLL32(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("ksll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.16. KSLL32 ===== */ /* ===== Inline Function Start for 4.17. KSLLI32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT * \brief KSLLI32 (SIMD 32-bit Saturating Shift Left Logical Immediate) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * KSLLI32 Rd, Rs1, imm5u * ~~~ * * **Purpose**:\n * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift * amount is an immediate value. * * **Description**:\n * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled * with zero and the shift amount is specified by the imm5u constant. Any shifted value greater than * 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated * results are written to Rd. If any saturation is performed, set OV bit to 1. * * **Operations**:\n * ~~~ * sa = imm5u[4:0]; * if (sa != 0) { * res[(31+sa):0] = Rs1.W[x] << sa; * if (res > (2^31)-1) { * res = 0x7fffffff; OV = 1; * } else if (res < -2^31) { * res = 0x80000000; OV = 1; * } * Rd.W[x] = res[31:0]; * } else { * Rd = Rs1; * } * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_KSLLI32(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("kslli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 4.17. KSLLI32 ===== */ /* ===== Inline Function Start for 4.18.1. KSLRA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT * \brief KSLRA32 (SIMD 32-bit Shift Left Logical with Saturation or Shift Right Arithmetic) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * KSLRA32 Rd, Rs1, Rs2 * KSLRA32.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the * right shift. * * **Description**:\n * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`. * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u` * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit * position for rounding effect. After the shift, saturation, or rounding, the final results are written to * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect * this instruction. * * **Operations**:\n * ~~~ * if (Rs2[5:0] < 0) { * sa = -Rs2[5:0]; * sa = (sa == 32)? 31 : sa; * if (`.u` form) { * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1; * Rd.W[x] = res[31:0]; * } else { * Rd.W[x] = SE32(Rs1.W[x][31:sa]); * } * } else { * sa = Rs2[4:0]; * res[(31+sa):0] = Rs1.W[x] <<(logic) sa; * if (res > (2^31)-1) { * res[31:0] = 0x7fffffff; OV = 1; * } else if (res < -2^31) { * res[31:0] = 0x80000000; OV = 1; * } * Rd.W[x] = res[31:0]; * } * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSLRA32(unsigned long a, int b) { unsigned long result; __ASM volatile("kslra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.18.1. KSLRA32 ===== */ /* ===== Inline Function Start for 4.18.2. KSLRA32.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT * \brief KSLRA32.u (SIMD 32-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * KSLRA32 Rd, Rs1, Rs2 * KSLRA32.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the * right shift. * * **Description**:\n * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`. * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u` * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit * position for rounding effect. After the shift, saturation, or rounding, the final results are written to * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect * this instruction. * * **Operations**:\n * ~~~ * if (Rs2[5:0] < 0) { * sa = -Rs2[5:0]; * sa = (sa == 32)? 31 : sa; * if (`.u` form) { * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1; * Rd.W[x] = res[31:0]; * } else { * Rd.W[x] = SE32(Rs1.W[x][31:sa]); * } * } else { * sa = Rs2[4:0]; * res[(31+sa):0] = Rs1.W[x] <<(logic) sa; * if (res > (2^31)-1) { * res[31:0] = 0x7fffffff; OV = 1; * } else if (res < -2^31) { * res[31:0] = 0x80000000; OV = 1; * } * Rd.W[x] = res[31:0]; * } * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSLRA32_U(unsigned long a, int b) { unsigned long result; __ASM volatile("kslra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.18.2. KSLRA32.u ===== */ /* ===== Inline Function Start for 4.19. KSTAS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief KSTAS32 (SIMD 32-bit Signed Saturating Straight Addition & Subtraction) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * KSTAS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element saturating addition and 32-bit signed integer element * saturating subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit * elements. * * **Description**:\n * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit * integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit integer element in [31:0] of * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction. * * **Operations**:\n * ~~~ * res[1] = Rs1.W[1] + Rs2.W[1]; * res[0] = Rs1.W[0] - Rs2.W[0]; * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[1] = res[1]; * Rd.W[0] = res[0]; * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSTAS32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("kstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.19. KSTAS32 ===== */ /* ===== Inline Function Start for 4.20. KSTSA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief KSTSA32 (SIMD 32-bit Signed Saturating Straight Subtraction & Addition) * \details * **Type**: SIM (RV64 Only) * * **Syntax**:\n * ~~~ * KSTSA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element * saturating addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit * elements. * *Description: * * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer * element in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with * the 32-bit integer element in [31:0] of Rs2. If any of the results are beyond the Q31 number range ( * -2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are * written to [63:32] of Rd for subtraction and [31:0] of Rd for addition. * * **Operations**:\n * ~~~ * res[1] = Rs1.W[1] - Rs2.W[1]; * res[0] = Rs1.W[0] + Rs2.W[0]; * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[1] = res[1]; * Rd.W[0] = res[0]; * for RV64, x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSTSA32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("kstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.20. KSTSA32 ===== */ /* ===== Inline Function Start for 4.21. KSUB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief KSUB32 (SIMD 32-bit Signed Saturating Subtraction) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * KSUB32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit signed integer elements saturating subtractions simultaneously. * * **Description**:\n * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit * signed integer elements in Rs1. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <= * 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to * Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.W[x] - Rs2.W[x]; * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_KSUB32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.21. KSUB32 ===== */ /* ===== Inline Function Start for 4.22.1. PKBB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK * \brief PKBB32 (Pack Two 32-bit Data from Both Bottom Half) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * PKBB32 Rd, Rs1, Rs2 * PKBT32 Rd, Rs1, Rs2 * PKTT32 Rd, Rs1, Rs2 * PKTB32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Pack 32-bit data from 64-bit chunks in two registers. * * PKBB32: bottom.bottom * * PKBT32: bottom.top * * PKTT32: top.top * * PKTB32: top.bottom * * **Description**:\n * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. * * **Operations**:\n * ~~~ * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_PKBB32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("pkbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.22.1. PKBB32 ===== */ /* ===== Inline Function Start for 4.22.2. PKBT32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK * \brief PKBT32 (Pack Two 32-bit Data from Bottom and Top Half) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * PKBB32 Rd, Rs1, Rs2 * PKBT32 Rd, Rs1, Rs2 * PKTT32 Rd, Rs1, Rs2 * PKTB32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Pack 32-bit data from 64-bit chunks in two registers. * * PKBB32: bottom.bottom * * PKBT32: bottom.top * * PKTT32: top.top * * PKTB32: top.bottom * * **Description**:\n * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. * * **Operations**:\n * ~~~ * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_PKBT32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("pkbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.22.2. PKBT32 ===== */ /* ===== Inline Function Start for 4.22.3. PKTT32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK * \brief PKTT32 (Pack Two 32-bit Data from Both Top Half) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * PKBB32 Rd, Rs1, Rs2 * PKBT32 Rd, Rs1, Rs2 * PKTT32 Rd, Rs1, Rs2 * PKTB32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Pack 32-bit data from 64-bit chunks in two registers. * * PKBB32: bottom.bottom * * PKBT32: bottom.top * * PKTT32: top.top * * PKTB32: top.bottom * * **Description**:\n * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. * * **Operations**:\n * ~~~ * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_PKTT32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("pktt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.22.3. PKTT32 ===== */ /* ===== Inline Function Start for 4.22.4. PKTB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK * \brief PKTB32 (Pack Two 32-bit Data from Top and Bottom Half) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * PKBB32 Rd, Rs1, Rs2 * PKBT32 Rd, Rs1, Rs2 * PKTT32 Rd, Rs1, Rs2 * PKTB32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Pack 32-bit data from 64-bit chunks in two registers. * * PKBB32: bottom.bottom * * PKBT32: bottom.top * * PKTT32: top.top * * PKTB32: top.bottom * * **Description**:\n * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. * * **Operations**:\n * ~~~ * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_PKTB32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("pktb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.22.4. PKTB32 ===== */ /* ===== Inline Function Start for 4.23. RADD32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief RADD32 (SIMD 32-bit Signed Halving Addition) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * RADD32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element additions simultaneously. The results are halved to avoid * overflow or saturation. * * **Description**:\n * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to * Rd. * * **Examples**:\n * ~~~ * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF Rd = 0x7FFFFFFF * * Rs1 = 0x80000000, Rs2 = 0x80000000 Rd = 0x80000000 * * Rs1 = 0x40000000, Rs2 = 0x80000000 Rd = 0xE0000000 * ~~~ * * **Operations**:\n * ~~~ * Rd.W[x] = (Rs1.W[x] + Rs2.W[x]) s>> 1; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RADD32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("radd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.23. RADD32 ===== */ /* ===== Inline Function Start for 4.24. RCRAS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief RCRAS32 (SIMD 32-bit Signed Halving Cross Addition & Subtraction) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * RCRAS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to * avoid overflow or saturation. * * **Description**:\n * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit * signed integer element in [31:0] of Rs2, and subtracts the 32-bit signed integer element in [63:32] of * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd * for subtraction. * * **Examples**:\n * ~~~ * Please see `RADD32` and `RSUB32` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) s>> 1; * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) s>> 1; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RCRAS32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("rcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.24. RCRAS32 ===== */ /* ===== Inline Function Start for 4.25. RCRSA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief RCRSA32 (SIMD 32-bit Signed Halving Cross Subtraction & Addition) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * RCRSA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to * avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 32-bit signed integer element in [31:0] of Rs2 from the * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0] * of Rs1 with the 32-bit signed integer element in [63:32] of Rs2. The two results are first * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of * Rd for addition. * * **Examples**:\n * ~~~ * Please see `RADD32` and `RSUB32` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) s>> 1; * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) s>> 1; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RCRSA32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("rcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.25. RCRSA32 ===== */ /* ===== Inline Function Start for 4.26. RSTAS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief RSTAS32 (SIMD 32-bit Signed Halving Straight Addition & Subtraction) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * RSTAS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are * halved to avoid overflow or saturation. * * **Description**:\n * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit * signed integer element in [63:32] of Rs2, and subtracts the 32-bit signed integer element in [31:0] of * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd * for subtraction. * * **Examples**:\n * ~~~ * Please see `RADD32` and `RSUB32` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) s>> 1; * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) s>> 1; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RSTAS32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("rstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.26. RSTAS32 ===== */ /* ===== Inline Function Start for 4.27. RSTSA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief RSTSA32 (SIMD 32-bit Signed Halving Straight Subtraction & Addition) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * RSTSA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are * halved to avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 32-bit signed integer element in [63:32] of Rs2 from the * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0] * of Rs1 with the 32-bit signed integer element in [31:0] of Rs2. The two results are first arithmetically * right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for addition. * * **Examples**:\n * ~~~ * Please see `RADD32` and `RSUB32` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) s>> 1; * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) s>> 1; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RSTSA32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("rstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.27. RSTSA32 ===== */ /* ===== Inline Function Start for 4.28. RSUB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief RSUB32 (SIMD 32-bit Signed Halving Subtraction) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * RSUB32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element subtractions simultaneously. The results are halved to * avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then * written to Rd. * * **Examples**:\n * ~~~ * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0x7FFFFFFF * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x80000000 * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0xA0000000 * ~~~ * * **Operations**:\n * ~~~ * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) s>> 1; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_RSUB32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("rsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.28. RSUB32 ===== */ /* ===== Inline Function Start for 4.29. SLL32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT * \brief SLL32 (SIMD 32-bit Shift Left Logical) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * SLL32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit elements logical left shift operations simultaneously. The shift amount is a * variable from a GPR. * * **Description**:\n * The 32-bit elements in Rs1 are left-shifted logically. And the results are written to Rd. * The shifted out bits are filled with zero and the shift amount is specified by the low-order 5-bits of * the value in the Rs2 register. * * **Operations**:\n * ~~~ * sa = Rs2[4:0]; * Rd.W[x] = Rs1.W[x] << sa; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SLL32(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("sll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.29. SLL32 ===== */ /* ===== Inline Function Start for 4.30. SLLI32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT * \brief SLLI32 (SIMD 32-bit Shift Left Logical Immediate) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * SLLI32 Rd, Rs1, imm5u[4:0] * ~~~ * * **Purpose**:\n * Do 32-bit element logical left shift operations simultaneously. The shift amount is an * immediate value. * * **Description**:\n * The 32-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with * zero and the shift amount is specified by the imm5u[4:0] constant. And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = imm5u[4:0]; * Rd.W[x] = Rs1.W[x] << sa; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SLLI32(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("slli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 4.30. SLLI32 ===== */ /* ===== Inline Function Start for 4.31. SMAX32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC * \brief SMAX32 (SIMD 32-bit Signed Maximum) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * SMAX32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit signed integer elements finding maximum operations simultaneously. * * **Description**:\n * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The * selected results are written to Rd. * * **Operations**:\n * ~~~ * Rd.W[x] = (Rs1.W[x] > Rs2.W[x])? Rs1.W[x] : Rs2.W[x]; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SMAX32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("smax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.31. SMAX32 ===== */ /* ===== Inline Function Start for 4.32.1. SMBB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT * \brief SMBB32 (Signed Multiply Bottom Word & Bottom Word) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * SMBB32 Rd, Rs1, Rs2 * SMBT32 Rd, Rs1, Rs2 * SMTT32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element of a register with the signed 32-bit element of another * register and write the 64-bit result to a third register. * * SMBB32: bottom*bottom * * SMBT32: bottom*top * * SMTT32: top*top * * **Description**:\n * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit * element of Rs2. It is actually an alias of `MULSR64` instruction. * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit * element of Rs2. * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element * of Rs2. * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as * signed integers. * * **Operations**:\n * ~~~ * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1]; * // SMTT32 Rd = res; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMBB32(unsigned long a, unsigned long b) { long result; __ASM volatile("smbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.32.1. SMBB32 ===== */ /* ===== Inline Function Start for 4.32.2. SMBT32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT * \brief SMBT32 (Signed Multiply Bottom Word & Top Word) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * SMBB32 Rd, Rs1, Rs2 * SMBT32 Rd, Rs1, Rs2 * SMTT32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element of a register with the signed 32-bit element of another * register and write the 64-bit result to a third register. * * SMBB32: bottom*bottom * * SMBT32: bottom*top * * SMTT32: top*top * * **Description**:\n * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit * element of Rs2. It is actually an alias of `MULSR64` instruction. * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit * element of Rs2. * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element * of Rs2. * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as * signed integers. * * **Operations**:\n * ~~~ * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1]; * // SMTT32 Rd = res; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMBT32(unsigned long a, unsigned long b) { long result; __ASM volatile("smbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.32.2. SMBT32 ===== */ /* ===== Inline Function Start for 4.32.3. SMTT32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT * \brief SMTT32 (Signed Multiply Top Word & Top Word) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * SMBB32 Rd, Rs1, Rs2 * SMBT32 Rd, Rs1, Rs2 * SMTT32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element of a register with the signed 32-bit element of another * register and write the 64-bit result to a third register. * * SMBB32: bottom*bottom * * SMBT32: bottom*top * * SMTT32: top*top * * **Description**:\n * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit * element of Rs2. It is actually an alias of `MULSR64` instruction. * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit * element of Rs2. * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element * of Rs2. * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as * signed integers. * * **Operations**:\n * ~~~ * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1]; * // SMTT32 Rd = res; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMTT32(unsigned long a, unsigned long b) { long result; __ASM volatile("smtt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.32.3. SMTT32 ===== */ /* ===== Inline Function Start for 4.33.1. SMDS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC * \brief SMDS32 (Signed Multiply Two Words and Subtract) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * SMDS32 Rd, Rs1, Rs2 * SMDRS32 Rd, Rs1, Rs2 * SMXDS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then * perform a subtraction operation between the two 64-bit results. * * SMDS32: top*top - bottom*bottom * * SMDRS32: bottom*bottom - top*top * * SMXDS32: top*bottom - bottom*top * * **Description**:\n * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of * Rs1 with the top 32-bit element of Rs2. * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit * element of Rs1 with the bottom 32-bit element of Rs2. * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of * Rs1 with the bottom 32-bit element of Rs2. * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed * integers. * * **Operations**:\n * ~~~ * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32 * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32 * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMDS32(unsigned long a, unsigned long b) { long result; __ASM volatile("smds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.33.1. SMDS32 ===== */ /* ===== Inline Function Start for 4.33.2. SMDRS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC * \brief SMDRS32 (Signed Multiply Two Words and Reverse Subtract) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * SMDS32 Rd, Rs1, Rs2 * SMDRS32 Rd, Rs1, Rs2 * SMXDS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then * perform a subtraction operation between the two 64-bit results. * * SMDS32: top*top - bottom*bottom * * SMDRS32: bottom*bottom - top*top * * SMXDS32: top*bottom - bottom*top * * **Description**:\n * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of * Rs1 with the top 32-bit element of Rs2. * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit * element of Rs1 with the bottom 32-bit element of Rs2. * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of * Rs1 with the bottom 32-bit element of Rs2. * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed * integers. * * **Operations**:\n * ~~~ * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32 * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32 * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMDRS32(unsigned long a, unsigned long b) { long result; __ASM volatile("smdrs32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.33.2. SMDRS32 ===== */ /* ===== Inline Function Start for 4.33.3. SMXDS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC * \brief SMXDS32 (Signed Crossed Multiply Two Words and Subtract) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * SMDS32 Rd, Rs1, Rs2 * SMDRS32 Rd, Rs1, Rs2 * SMXDS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then * perform a subtraction operation between the two 64-bit results. * * SMDS32: top*top - bottom*bottom * * SMDRS32: bottom*bottom - top*top * * SMXDS32: top*bottom - bottom*top * * **Description**:\n * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of * Rs1 with the top 32-bit element of Rs2. * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit * element of Rs1 with the bottom 32-bit element of Rs2. * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of * Rs1 with the bottom 32-bit element of Rs2. * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed * integers. * * **Operations**:\n * ~~~ * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32 * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32 * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_SMXDS32(unsigned long a, unsigned long b) { long result; __ASM volatile("smxds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.33.3. SMXDS32 ===== */ /* ===== Inline Function Start for 4.34. SMIN32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC * \brief SMIN32 (SIMD 32-bit Signed Minimum) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * SMIN32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit signed integer elements finding minimum operations simultaneously. * * **Description**:\n * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected * results are written to Rd. * * **Operations**:\n * ~~~ * Rd.W[x] = (Rs1.W[x] < Rs2.W[x])? Rs1.W[x] : Rs2.W[x]; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SMIN32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("smin32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.34. SMIN32 ===== */ /* ===== Inline Function Start for 4.35.1. SRA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT * \brief SRA32 (SIMD 32-bit Shift Right Arithmetic) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * SRA32 Rd, Rs1, Rs2 * SRA32.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted * results. * * **Description**:\n * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is * added to the most significant discarded bit of each 32-bit data element to calculate the final results. * And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = Rs2[4:0]; * if (sa > 0) { * if (`.u` form) { // SRA32.u * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1; * Rd.W[x] = res[31:0]; * else { // SRA32 * Rd.W[x] = SE32(Rs1.W[x][31:sa]) * } * } else { * Rd = Rs1; * } * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SRA32(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.35.1. SRA32 ===== */ /* ===== Inline Function Start for 4.35.2. SRA32.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT * \brief SRA32.u (SIMD 32-bit Rounding Shift Right Arithmetic) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * SRA32 Rd, Rs1, Rs2 * SRA32.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted * results. * * **Description**:\n * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is * added to the most significant discarded bit of each 32-bit data element to calculate the final results. * And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = Rs2[4:0]; * if (sa > 0) { * if (`.u` form) { // SRA32.u * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1; * Rd.W[x] = res[31:0]; * else { // SRA32 * Rd.W[x] = SE32(Rs1.W[x][31:sa]) * } * } else { * Rd = Rs1; * } * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SRA32_U(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("sra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.35.2. SRA32.u ===== */ /* ===== Inline Function Start for 4.36.1. SRAI32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT * \brief SRAI32 (SIMD 32-bit Shift Right Arithmetic Immediate) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * SRAI32 Rd, Rs1, imm5u * SRAI32.u Rd, Rs1, imm5u * ~~~ * * **Purpose**:\n * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is * an immediate value. The `.u` form performs additional rounding up operations on the shifted * results. * * **Description**:\n * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most * significant discarded bit of each 32-bit data to calculate the final results. And the results are written * to Rd. * * **Operations**:\n * ~~~ * sa = imm5u[4:0]; * if (sa > 0) { * if (`.u` form) { // SRAI32.u * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1; * Rd.W[x] = res[31:0]; * else { // SRAI32 * Rd.W[x] = SE32(Rs1.W[x][31:sa]); * } * } else { * Rd = Rs1; * } * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SRAI32(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("srai32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 4.36.1. SRAI32 ===== */ /* ===== Inline Function Start for 4.36.2. SRAI32.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT * \brief SRAI32.u (SIMD 32-bit Rounding Shift Right Arithmetic Immediate) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * SRAI32 Rd, Rs1, imm5u * SRAI32.u Rd, Rs1, imm5u * ~~~ * * **Purpose**:\n * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is * an immediate value. The `.u` form performs additional rounding up operations on the shifted * results. * * **Description**:\n * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most * significant discarded bit of each 32-bit data to calculate the final results. And the results are written * to Rd. * * **Operations**:\n * ~~~ * sa = imm5u[4:0]; * if (sa > 0) { * if (`.u` form) { // SRAI32.u * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1; * Rd.W[x] = res[31:0]; * else { // SRAI32 * Rd.W[x] = SE32(Rs1.W[x][31:sa]); * } * } else { * Rd = Rs1; * } * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SRAI32_U(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("srai32.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 4.36.2. SRAI32.u ===== */ /* ===== Inline Function Start for 4.37. SRAIW.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT * \brief SRAIW.u (Rounding Shift Right Arithmetic Immediate Word) * \details * **Type**: DSP (RV64 only) * * **Syntax**:\n * ~~~ * SRAIW.u Rd, Rs1, imm5u * ~~~ * * **Purpose**:\n * Perform a 32-bit arithmetic right shift operation with rounding. The shift amount is an * immediate value. * * **Description**:\n * This instruction right-shifts the lower 32-bit content of Rs1 arithmetically. The shifted * out bits are filled with the sign-bit Rs1(31) and the shift amount is specified by the imm5u constant. * For the rounding operation, a value of 1 is added to the most significant discarded bit of the data to * calculate the final result. And the result is sign-extended and written to Rd. * * **Operations**:\n * ~~~ * sa = imm5u; * if (sa != 0) { * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1; * Rd = SE32(res[31:0]); * } else { * Rd = SE32(Rs1.W[0]); * } * ~~~ * * \param [in] a int type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in long type */ #define __RV_SRAIW_U(a, b) \ ({ \ long result; \ int __a = (int)(a); \ __ASM volatile("sraiw.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 4.37. SRAIW.u ===== */ /* ===== Inline Function Start for 4.38.1. SRL32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT * \brief SRL32 (SIMD 32-bit Shift Right Logical) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * SRL32 Rd, Rs1, Rs2 * SRL32.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit element logical right shift operations simultaneously. The shift amount is a * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted * results. * * **Description**:\n * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant * discarded bit of each 32-bit data element to calculate the final results. And the results are written to * Rd. * * **Operations**:\n * ~~~ * sa = Rs2[4:0]; * if (sa > 0) { * if (`.u` form) { // SRA32.u * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1; * Rd.W[x] = res[31:0]; * else { // SRA32 * Rd.W[x] = ZE32(Rs1.W[x][31:sa]) * } * } else { * Rd = Rs1; * } * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SRL32(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("srl32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.38.1. SRL32 ===== */ /* ===== Inline Function Start for 4.38.2. SRL32.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT * \brief SRL32.u (SIMD 32-bit Rounding Shift Right Logical) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * SRL32 Rd, Rs1, Rs2 * SRL32.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit element logical right shift operations simultaneously. The shift amount is a * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted * results. * * **Description**:\n * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant * discarded bit of each 32-bit data element to calculate the final results. And the results are written to * Rd. * * **Operations**:\n * ~~~ * sa = Rs2[4:0]; * if (sa > 0) { * if (`.u` form) { // SRA32.u * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1; * Rd.W[x] = res[31:0]; * else { // SRA32 * Rd.W[x] = ZE32(Rs1.W[x][31:sa]) * } * } else { * Rd = Rs1; * } * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SRL32_U(unsigned long a, unsigned int b) { unsigned long result; __ASM volatile("srl32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.38.2. SRL32.u ===== */ /* ===== Inline Function Start for 4.39.1. SRLI32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT * \brief SRLI32 (SIMD 32-bit Shift Right Logical Immediate) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * SRLI32 Rd, Rs1, imm5u * SRLI32.u Rd, Rs1, imm5u * ~~~ * * **Purpose**:\n * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an * immediate value. The `.u` form performs additional rounding up operations on the shifted results. * * **Description**:\n * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit * data to calculate the final results. And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = imm5u[4:0]; * if (sa > 0) { * if (`.u` form) { // SRLI32.u * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1; * Rd.W[x] = res[31:0]; * else { // SRLI32 * Rd.W[x] = ZE32(Rs1.W[x][31:sa]); * } * } else { * Rd = Rs1; * } * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SRLI32(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("srli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 4.39.1. SRLI32 ===== */ /* ===== Inline Function Start for 4.39.2. SRLI32.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT * \brief SRLI32.u (SIMD 32-bit Rounding Shift Right Logical Immediate) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * SRLI32 Rd, Rs1, imm5u * SRLI32.u Rd, Rs1, imm5u * ~~~ * * **Purpose**:\n * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an * immediate value. The `.u` form performs additional rounding up operations on the shifted results. * * **Description**:\n * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit * data to calculate the final results. And the results are written to Rd. * * **Operations**:\n * ~~~ * sa = imm5u[4:0]; * if (sa > 0) { * if (`.u` form) { // SRLI32.u * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1; * Rd.W[x] = res[31:0]; * else { // SRLI32 * Rd.W[x] = ZE32(Rs1.W[x][31:sa]); * } * } else { * Rd = Rs1; * } * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned int type of value stored in b * \return value stored in unsigned long type */ #define __RV_SRLI32_U(a, b) \ ({ \ unsigned long result; \ unsigned long __a = (unsigned long)(a); \ __ASM volatile("srli32.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for 4.39.2. SRLI32.u ===== */ /* ===== Inline Function Start for 4.40. STAS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief STAS32 (SIMD 32-bit Straight Addition & Subtraction) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * STAS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit * chunk simultaneously. Operands are from corresponding 32-bit elements. * * **Description**:\n * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit * integer element in [63:32] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts * the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and * writes the result to [31:0] of Rd. * * **Note**:\n * This instruction can be used for either signed or unsigned operations. * * **Operations**:\n * ~~~ * Rd.W[1] = Rs1.W[1] + Rs2.W[1]; * Rd.W[0] = Rs1.W[0] - Rs2.W[0]; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_STAS32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("stas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.40. STAS32 ===== */ /* ===== Inline Function Start for 4.41. STSA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief STSA32 (SIMD 32-bit Straight Subtraction & Addition) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * STSA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit * chunk simultaneously. Operands are from corresponding 32-bit elements. * *Description: * * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer * element in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit * integer element in [31:0] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and writes the result * to [31:0] of Rd * * **Note**:\n * This instruction can be used for either signed or unsigned operations. * * **Operations**:\n * ~~~ * Rd.W[1] = Rs1.W[1] - Rs2.W[1]; * Rd.W[0] = Rs1.W[0] + Rs2.W[0]; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_STSA32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("stsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.41. STSA32 ===== */ /* ===== Inline Function Start for 4.42. SUB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief SUB32 (SIMD 32-bit Subtraction) * \details * **Type**: DSP (RV64 Only) * * **Syntax**:\n * ~~~ * SUB32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit integer element subtractions simultaneously. * * **Description**:\n * This instruction subtracts the 32-bit integer elements in Rs2 from the 32-bit integer * elements in Rs1, and then writes the results to Rd. * * **Note**:\n * This instruction can be used for either signed or unsigned subtraction. * * **Operations**:\n * ~~~ * Rd.W[x] = Rs1.W[x] - Rs2.W[x]; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_SUB32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("sub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.42. SUB32 ===== */ /* ===== Inline Function Start for 4.43. UKADD32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief UKADD32 (SIMD 32-bit Unsigned Saturating Addition) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * UKADD32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit unsigned integer element saturating additions simultaneously. * * **Description**:\n * This instruction adds the 32-bit unsigned integer elements in Rs1 with the 32-bit * unsigned integer elements in Rs2. If any of the results are beyond the 32-bit unsigned number * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated * results are written to Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.W[x] + Rs2.W[x]; * if (res[x] > (2^32)-1) { * res[x] = (2^32)-1; * OV = 1; * } * Rd.W[x] = res[x]; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKADD32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ukadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.43. UKADD32 ===== */ /* ===== Inline Function Start for 4.44. UKCRAS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief UKCRAS32 (SIMD 32-bit Unsigned Saturating Cross Addition & Subtraction) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * UKCRAS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed * 32-bit elements. * * **Description**:\n * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32- * bit unsigned integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit unsigned * integer element in [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and * [31:0] of Rd for subtraction. * * **Operations**:\n * ~~~ * res1 = Rs1.W[1] + Rs2.W[0]; * res2 = Rs1.W[0] - Rs2.W[1]; * if (res1 > (2^32)-1) { * res1 = (2^32)-1; * OV = 1; * } * if (res2 < 0) { * res2 = 0; * OV = 1; * } * Rd.W[1] = res1; * Rd.W[0] = res2; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKCRAS32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ukcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.44. UKCRAS32 ===== */ /* ===== Inline Function Start for 4.45. UKCRSA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief UKCRSA32 (SIMD 32-bit Unsigned Saturating Cross Subtraction & Addition) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * UKCRSA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from crossed * 32-bit elements. * * **Description**:\n * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the * 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned * integer element in [63:32] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and * [31:0] of Rd for addition. * * **Operations**:\n * ~~~ * res1 = Rs1.W[1] - Rs2.W[0]; * res2 = Rs1.W[0] + Rs2.W[1]; * if (res1 < 0) { * res1 = 0; * OV = 1; * } else if (res2 > (2^32)-1) { * res2 = (2^32)-1; * OV = 1; * } * Rd.W[1] = res1; * Rd.W[0] = res2; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKCRSA32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ukcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.45. UKCRSA32 ===== */ /* ===== Inline Function Start for 4.46. UKSTAS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief UKSTAS32 (SIMD 32-bit Unsigned Saturating Straight Addition & Subtraction) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * UKSTAS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from * corresponding 32-bit elements. * * **Description**:\n * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32- * bit unsigned integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit unsigned * integer element in [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and * [31:0] of Rd for subtraction. * * **Operations**:\n * ~~~ * res1 = Rs1.W[1] + Rs2.W[1]; * res2 = Rs1.W[0] - Rs2.W[0]; * if (res1 > (2^32)-1) { * res1 = (2^32)-1; * OV = 1; * } * if (res2 < 0) { * res2 = 0; * OV = 1; * } * Rd.W[1] = res1; * Rd.W[0] = res2; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKSTAS32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ukstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.46. UKSTAS32 ===== */ /* ===== Inline Function Start for 4.47. UKSTSA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief UKSTSA32 (SIMD 32-bit Unsigned Saturating Straight Subtraction & Addition) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * UKSTSA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from * corresponding 32-bit elements. * * **Description**:\n * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from * the 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned * integer element in [31:0] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and * [31:0] of Rd for addition. * * **Operations**:\n * ~~~ * res1 = Rs1.W[1] - Rs2.W[1]; * res2 = Rs1.W[0] + Rs2.W[0]; * if (res1 < 0) { * res1 = 0; * OV = 1; * } else if (res2 > (2^32)-1) { * res2 = (2^32)-1; * OV = 1; * } * Rd.W[1] = res1; * Rd.W[0] = res2; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKSTSA32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ukstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.47. UKSTSA32 ===== */ /* ===== Inline Function Start for 4.48. UKSUB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief UKSUB32 (SIMD 32-bit Unsigned Saturating Subtraction) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * UKSUB32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit unsigned integer elements saturating subtractions simultaneously. * * **Description**:\n * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit * unsigned integer elements in Rs1. If any of the results are beyond the 32-bit unsigned number * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated * results are written to Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.W[x] - Rs2.W[x]; * if (res[x] < 0) { * res[x] = 0; * OV = 1; * } * Rd.W[x] = res[x]; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UKSUB32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("uksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.48. UKSUB32 ===== */ /* ===== Inline Function Start for 4.49. UMAX32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC * \brief UMAX32 (SIMD 32-bit Unsigned Maximum) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * UMAX32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit unsigned integer elements finding maximum operations simultaneously. * * **Description**:\n * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The * selected results are written to Rd. * * **Operations**:\n * ~~~ * Rd.W[x] = (Rs1.W[x] u> Rs2.W[x])? Rs1.W[x] : Rs2.W[x]; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_UMAX32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("umax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.49. UMAX32 ===== */ /* ===== Inline Function Start for 4.50. UMIN32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC * \brief UMIN32 (SIMD 32-bit Unsigned Minimum) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * UMIN32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit unsigned integer elements finding minimum operations simultaneously. * * **Description**:\n * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The * selected results are written to Rd. * * **Operations**:\n * ~~~ * Rd.W[x] = (Rs1.W[x] > 1; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URADD32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("uradd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.51. URADD32 ===== */ /* ===== Inline Function Start for 4.52. URCRAS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief URCRAS32 (SIMD 32-bit Unsigned Halving Cross Addition & Subtraction) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * URCRAS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element * subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The * results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32- * bit unsigned integer element in [31:0] of Rs2, and subtracts the 32-bit unsigned integer element in * [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for * subtraction. * * **Examples**:\n * ~~~ * Please see `URADD32` and `URSUB32` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) u>> 1; * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) u>> 1; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URCRAS32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("urcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.52. URCRAS32 ===== */ /* ===== Inline Function Start for 4.53. URCRSA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief URCRSA32 (SIMD 32-bit Unsigned Halving Cross Subtraction & Addition) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * URCRSA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element * addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results * are halved to avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the * 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer in * [31:0] of Rs1 with the 32-bit unsigned integer element in [63:32] of Rs2. The two results are first * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for * addition. * * **Examples**:\n * ~~~ * Please see `URADD32` and `URSUB32` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) u>> 1; * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) u>> 1; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URCRSA32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("urcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.53. URCRSA32 ===== */ /* ===== Inline Function Start for 4.54. URSTAS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief URSTAS32 (SIMD 32-bit Unsigned Halving Straight Addition & Subtraction) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * URSTAS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element * subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. * The results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32- * bit unsigned integer element in [63:32] of Rs2, and subtracts the 32-bit unsigned integer element in * [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for * subtraction. * * **Examples**:\n * ~~~ * Please see `URADD32` and `URSUB32` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) u>> 1; * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) u>> 1; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URSTAS32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("urstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.54. URSTAS32 ===== */ /* ===== Inline Function Start for 4.55. URSTSA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief URSTSA32 (SIMD 32-bit Unsigned Halving Straight Subtraction & Addition) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * URSTSA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element * addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The * results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from * the 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer * in [31:0] of Rs1 with the 32-bit unsigned integer element in [31:0] of Rs2. The two results are first * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for * addition. * * **Examples**:\n * ~~~ * Please see `URADD32` and `URSUB32` instructions. * ~~~ * * **Operations**:\n * ~~~ * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) u>> 1; * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) u>> 1; * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URSTSA32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("urstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.55. URSTSA32 ===== */ /* ===== Inline Function Start for 4.56. URSUB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB * \brief URSUB32 (SIMD 32-bit Unsigned Halving Subtraction) * \details * **Type**: SIMD (RV64 Only) * * **Syntax**:\n * ~~~ * URSUB32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do 32-bit unsigned integer element subtractions simultaneously. The results are halved to * avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then * written to Rd. * * **Examples**:\n * ~~~ * * Ra = 0x7FFFFFFF, Rb = 0x80000000, Rt = 0xFFFFFFFF * * Ra = 0x80000000, Rb = 0x7FFFFFFF, Rt = 0x00000000 * * Ra = 0x80000000, Rb = 0x40000000, Rt = 0x20000000 * ~~~ * * **Operations**:\n * ~~~ * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) u>> 1; * for RV64: x=1...0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_URSUB32(unsigned long a, unsigned long b) { unsigned long result; __ASM volatile("ursub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for 4.56. URSUB32 ===== */ #endif /* __RISCV_XLEN == 64 */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default Nuclei Default SIMD DSP Additional Instructions * \ingroup NMSIS_Core_DSP_Intrinsic * \brief (RV32 & RV64)Nuclei Customized DSP Instructions * \details This is Nuclei customized DSP instructions for both RV32 and RV64 */ /* ===== Inline Function Start for EXPD80 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default * \brief EXPD80 (Expand and Copy Byte 0 to 32bit(when rv32) or 64bit(when rv64)) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * EXPD80 Rd, Rs1 * ~~~ * * **Purpose**:\n * When rv32, Copy 8-bit data from 32-bit chunks into 4 bytes in a register. * When rv64, Copy 8-bit data from 64-bit chunks into 8 bytes in a register. * * **Description**:\n * Moves Rs1.B[0][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0] * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0]); * for RV32: x=0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_EXPD80(unsigned long a) { unsigned long result; __ASM volatile("expd80 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for EXPD80 ===== */ /* ===== Inline Function Start for EXPD81 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default * \brief EXPD81 (Expand and Copy Byte 1 to 32bit(rv32) or 64bit(when rv64)) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * EXPD81 Rd, Rs1 * ~~~ * * **Purpose**:\n * Copy 8-bit data from 32-bit chunks into 4 bytes in a register. * * **Description**:\n * Moves Rs1.B[1][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0] * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0]); * for RV32: x=0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_EXPD81(unsigned long a) { unsigned long result; __ASM volatile("expd81 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for EXPD81 ===== */ /* ===== Inline Function Start for EXPD82 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default * \brief EXPD82 (Expand and Copy Byte 2 to 32bit(rv32) or 64bit(when rv64)) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * EXPD82 Rd, Rs1 * ~~~ * * **Purpose**:\n * Copy 8-bit data from 32-bit chunks into 4 bytes in a register. * * **Description**:\n * Moves Rs1.B[2][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0] * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0]); * for RV32: x=0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_EXPD82(unsigned long a) { unsigned long result; __ASM volatile("expd82 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for EXPD82 ===== */ /* ===== Inline Function Start for EXPD83 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default * \brief EXPD83 (Expand and Copy Byte 3 to 32bit(rv32) or 64bit(when rv64)) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * EXPD83 Rd, Rs1 * ~~~ * * **Purpose**:\n * Copy 8-bit data from 32-bit chunks into 4 bytes in a register. * * **Description**:\n * Moves Rs1.B[3][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0] * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0]); * for RV32: x=0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_EXPD83(unsigned long a) { unsigned long result; __ASM volatile("expd83 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for EXPD83 ===== */ #if (__RISCV_XLEN == 64) /* ===== Inline Function Start for EXPD84 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default * \brief EXPD84 (Expand and Copy Byte 4 to 64bit) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * EXPD84 Rd, Rs1 * ~~~ * * **Purpose**:\n * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register. * * **Description**:\n * Moves Rs1.B[4][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0] * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.B[4][7:0], Rs1.B[4][7:0], Rs1.B[4][7:0], Rs1.B[4][7:0]); * for RV32: x=0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_EXPD84(unsigned long a) { unsigned long result; __ASM volatile("expd84 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for EXPD84 ===== */ /* ===== Inline Function Start for EXPD85 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default * \brief EXPD85 (Expand and Copy Byte 5 to 64bit) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * EXPD85 Rd, Rs1 * ~~~ * * **Purpose**:\n * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register. * * **Description**:\n * Moves Rs1.B[5][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0] * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.B[5][7:0], Rs1.B[5][7:0], Rs1.B[5][7:0], Rs1.B[5][7:0]); * for RV32: x=0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_EXPD85(unsigned long a) { unsigned long result; __ASM volatile("expd85 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for EXPD85 ===== */ /* ===== Inline Function Start for EXPD86 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default * \brief EXPD86 (Expand and Copy Byte 6 to 64bit) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * EXPD86 Rd, Rs1 * ~~~ * * **Purpose**:\n * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register. * * **Description**:\n * Moves Rs1.B[6][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0] * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.B[6][7:0], Rs1.B[6][7:0], Rs1.B[6][7:0], Rs1.B[6][7:0]); * for RV32: x=0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_EXPD86(unsigned long a) { unsigned long result; __ASM volatile("expd86 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for EXPD86 ===== */ /* ===== Inline Function Start for EXPD87 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default * \brief EXPD87 (Expand and Copy Byte 7 to 64bit) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * EXPD87 Rd, Rs1 * ~~~ * * **Purpose**:\n * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register. * * **Description**:\n * Moves Rs1.B[7][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0] * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.B[7][7:0], Rs1.B[7][7:0], Rs1.B[7][7:0], Rs1.B[7][7:0]); * for RV32: x=0 * ~~~ * * \param [in] a unsigned long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_EXPD87(unsigned long a) { unsigned long result; __ASM volatile("expd87 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for EXPD87 ===== */ #endif /* __RISCV_XLEN == 64 */ #if (__RISCV_XLEN == 32) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__) /* XXXXX Nuclei Extended DSP Instructions for RV32 XXXXX */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1 Nuclei N1 SIMD DSP Additional Instructions * \ingroup NMSIS_Core_DSP_Intrinsic * \brief (RV32 only)Nuclei Customized N1 DSP Instructions * \details This is Nuclei customized DSP N1 instructions only for RV32 */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 Nuclei N2 SIMD DSP Additional Instructions * \ingroup NMSIS_Core_DSP_Intrinsic * \brief (RV32 only)Nuclei Customized N2 DSP Instructions * \details This is Nuclei customized DSP N2 instructions only for RV32 */ /** * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 Nuclei N3 SIMD DSP Additional Instructions * \ingroup NMSIS_Core_DSP_Intrinsic * \brief (RV32 only)Nuclei Customized N3 DSP Instructions * \details This is Nuclei customized DSP N3 instructions only for RV32 */ /* ===== Inline Function Start for DKHM8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1 * \brief DKHM8 (64-bit SIMD Signed Saturating Q7 Multiply) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKHM8 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7 * numbers again. * * **Description**:\n * For the `DKHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7 * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2. * * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen. * The result will be saturated to 0x7F and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * if (0x80 != aop | 0x80 != bop) { * res = (aop s* bop) >> 7; * } else { * res= 0x7F; * OV = 1; * } * } * Rd.H[x/2] = concat(rest, resb); * for RV32, x=0,2,4,6 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKHM8(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkhm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKHM8 ===== */ /* ===== Inline Function Start for DKHM16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1 * \brief DKHM16 (64-bit SIMD Signed Saturating Q15 Multiply) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKHM16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to * Q15 numbers again. * * **Description**:\n * For the `DKHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in * Rs2. * * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * if (0x8000 != aop | 0x8000 != bop) { * res = (aop s* bop) >> 15; * } else { * res= 0x7FFF; * OV = 1; * } * } * Rd.W[x/2] = concat(rest, resb); * for RV32: x=0, 2 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKHM16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkhm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKHM16 ===== */ /* ===== Inline Function Start for DKABS8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1 * \brief DKABS8 (64-bit SIMD 8-bit Saturating Absolute) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKABS8 Rd, Rs1 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Get the absolute value of 8-bit signed integer elements simultaneously. * * **Description**:\n * This instruction calculates the absolute value of 8-bit signed integer elements stored * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates * 0x7f as the output and sets the OV bit to 1. * * **Operations**:\n * ~~~ * src = Rs1.B[x]; * if (src == 0x80) { * src = 0x7f; * OV = 1; * } else if (src[7] == 1) * src = -src; * } * Rd.B[x] = src; * for RV32: x=7...0, * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKABS8(unsigned long long a) { unsigned long long result; __ASM volatile("dkabs8 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DKABS8 ===== */ /* ===== Inline Function Start for DKABS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1 * \brief DKABS16 (64-bit SIMD 16-bit Saturating Absolute) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKABS16 Rd, Rs1 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Get the absolute value of 16-bit signed integer elements simultaneously. * * **Description**:\n * This instruction calculates the absolute value of 16-bit signed integer elements stored * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction * generates 0x7fff as the output and sets the OV bit to 1. * * **Operations**:\n * ~~~ * src = Rs1.H[x]; * if (src == 0x8000) { * src = 0x7fff; * OV = 1; * } else if (src[15] == 1) * src = -src; * } * Rd.H[x] = src; * for RV32: x=3...0, * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKABS16(unsigned long long a) { unsigned long long result; __ASM volatile("dkabs16 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DKABS16 ===== */ /* ===== Inline Function Start for DKSLRA8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1 * \brief DKSLRA8 (64-bit SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKSLRA8 Rd, Rs1, Rs2 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with * Q7 saturation for the left shift. * * **Description**:\n * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`. * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. * If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect * this instruction. * * **Operations**:\n * ~~~ * if (Rs2[3:0] < 0) { * sa = -Rs2[3:0]; * sa = (sa == 8)? 7 : sa; * Rd.B[x] = SE8(Rs1.B[x][7:sa]); * } else { * sa = Rs2[2:0]; * res[(7+sa):0] = Rs1.B[x] <<(logic) sa; * if (res > (2^7)-1) { * res[7:0] = 0x7f; OV = 1; * } else if (res < -2^7) { * res[7:0] = 0x80; OV = 1; * } * Rd.B[x] = res[7:0]; * } * for RV32: x=7...0, * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b int type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKSLRA8(unsigned long long a, int b) { unsigned long long result; __ASM volatile("dkslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKSLRA8 ===== */ /* ===== Inline Function Start for DKSLRA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1 * \brief DKSLRA16 (64-bit SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKSLRA16 Rd, Rs1, Rs2 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with * Q15 saturation for the left shift. * * **Description**:\n * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`. * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. * After the shift, saturation, or rounding, the final results are written to * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect * this instruction. * * **Operations**:\n * ~~~ * if (Rs2[4:0] < 0) { * sa = -Rs2[4:0]; * sa = (sa == 16)? 15 : sa; * Rd.H[x] = SE16(Rs1.H[x][15:sa]); * } else { * sa = Rs2[3:0]; * res[(15+sa):0] = Rs1.H[x] <<(logic) sa; * if (res > (2^15)-1) { * res[15:0] = 0x7fff; OV = 1; * } else if (res < -2^15) { * res[15:0] = 0x8000; OV = 1; * } * d.H[x] = res[15:0]; * } * for RV32: x=3...0, * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b int type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKSLRA16(unsigned long long a, int b) { unsigned long long result; __ASM volatile("dkslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKSLRA16 ===== */ /* ===== Inline Function Start for DKADD8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1 * \brief DKADD8 (64-bit SIMD 8-bit Signed Saturating Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKADD8 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 8-bit signed integer element saturating additions simultaneously. * * **Description**:\n * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.B[x] + Rs2.B[x]; * if (res[x] > 127) { * res[x] = 127; * OV = 1; * } else if (res[x] < -128) { * res[x] = -128; * OV = 1; * } * Rd.B[x] = res[x]; * for RV32: x=7...0, * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKADD8(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKADD8 ===== */ /* ===== Inline Function Start for DKADD16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1 * \brief DKADD16 (64-bit SIMD 16-bit Signed Saturating Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKADD16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element saturating additions simultaneously. * * **Description**:\n * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.H[x] + Rs2.H[x]; * if (res[x] > 32767) { * res[x] = 32767; * OV = 1; * } else if (res[x] < -32768) { * res[x] = -32768; * OV = 1; * } * Rd.H[x] = res[x]; * for RV32: x=3...0, * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKADD16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKADD16 ===== */ /* ===== Inline Function Start for DKSUB8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1 * \brief DKSUB8 (64-bit SIMD 8-bit Signed Saturating Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKSUB8 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 8-bit signed elements saturating subtractions simultaneously. * * **Description**:\n * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.B[x] - Rs2.B[x]; * if (res[x] > (2^7)-1) { * res[x] = (2^7)-1; * OV = 1; * } else if (res[x] < -2^7) { * res[x] = -2^7; * OV = 1; * } * Rd.B[x] = res[x]; * for RV32: x=7...0, * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKSUB8(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKSUB8 ===== */ /* ===== Inline Function Start for DKSUB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1 * \brief DKSUB16 (64-bit SIMD 16-bit Signed Saturating Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKSUB16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 16-bit signed integer elements saturating subtractions simultaneously. * * **Description**:\n * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to * Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.H[x] - Rs2.H[x]; * if (res[x] > (2^15)-1) { * res[x] = (2^15)-1; * OV = 1; * } else if (res[x] < -2^15) { * res[x] = -2^15; * OV = 1; * } * Rd.H[x] = res[x]; * for RV32: x=3...0, * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKSUB16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKSUB16 ===== */ /* ===== Inline Function Start for DKHMX8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKHMX8 (64-bit SIMD Signed Crossed Saturating Q7 Multiply) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKHMX8 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do Q7xQ7 element crossed multiplications simultaneously. The Q15 results are then reduced to Q7 numbers again. * * **Description**:\n * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7 * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2. * * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen. * The result will be saturated to 0x7F and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * op1t = Rs1.B[x+1]; op2t = Rs2.B[x]; // top * op1b = Rs1.B[x]; op2b = Rs2.B[x+1]; // bottom * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * if (0x80 != aop | 0x80 != bop) { * res = (aop s* bop) >> 7; * } else { * res= 0x7F; * OV = 1; * } * } * Rd.H[x/2] = concat(rest, resb); * for RV32, x=0,2,4,6 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKHMX8(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkhmx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKHMX8 ===== */ /* ===== Inline Function Start for DKHMX16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKHMX16 (64-bit SIMD Signed Crossed Saturating Q15 Multiply) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKHMX16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do Q15xQ15 element crossed multiplications simultaneously. The Q31 results are then reduced to Q15 numbers again. * * **Description**:\n * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15 * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. * * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set. * * **Operations**:\n * ~~~ * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // top * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // bottom * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * if (0x8000 != aop | 0x8000 != bop) { * res = (aop s* bop) >> 15; * } else { * res= 0x7FFF; * OV = 1; * } * } * Rd.W[x/2] = concat(rest, resb); * for RV32, x=0,2 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKHMX16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkhmx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKHMX16 ===== */ /* ===== Inline Function Start for DSMMUL ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMMUL (64-bit MSW 32x32 Signed Multiply) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMMUL Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do MSW 32x32 element signed multiplications simultaneously. The results are written into Rd. * * **Description**:\n * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit * elements of Rs1 and Rs2 are treated as signed integers. The .u form of the instruction rounds up * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * res = (aop s* bop)[63:32]; * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSMMUL(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dsmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMMUL ===== */ /* ===== Inline Function Start for DSMMUL.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMMUL.u (64-bit MSW 32x32 Unsigned Multiply) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMMUL.u Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do MSW 32x32 element unsigned multiplications simultaneously. The results are written into Rd. * * **Description**:\n * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit * elements of Rs1 and Rs2 are treated as unsigned integers. The .u form of the instruction rounds up * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * res = RUND(aop u* bop)[63:32]; * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSMMUL_U(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dsmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMMUL.u ===== */ /* ===== Inline Function Start for DKWMMUL ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKWMMUL (64-bit MSW 32x32 Signed Multiply & Double) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKWMMUL Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do MSW 32x32 element signed multiplications simultaneously and double. The results are written into Rd. * * **Description**:\n * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts * the multiplication results one bit to the left and takes the most significant 32-bit results. If the * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The .u * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit * 30 before the shift and saturation operations. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * res = sat.q31((aop s* bop) << 1)[63:32]; * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKWMMUL(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkwmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKWMMUL ===== */ /* ===== Inline Function Start for DKWMMUL.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKWMMUL.u (64-bit MSW 32x32 Unsigned Multiply & Double) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKWMMUL.u Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do MSW 32x32 element unsigned multiplications simultaneously and double. The results are written into Rd. * * **Description**:\n * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts * the multiplication results one bit to the left and takes the most significant 32-bit results. If the * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The .u * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit * 30 before the shift and saturation operations. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { * res = sat.q31(RUND(aop u* bop) << 1)[63:32]; * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKWMMUL_U(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkwmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKWMMUL.u ===== */ /* ===== Inline Function Start for DKABS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKABS32 (64-bit SIMD 32-bit Saturating Absolute) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKABS32 Rd, Rs1 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Get the absolute value of 32-bit signed integer elements simultaneously. * * **Description**:\n * This instruction calculates the absolute value of 32-bit signed integer elements stored in Rs1 and writes the element * results to Rd. If the input number is 0x8000_0000, this instruction generates 0x7fff_ffff as the output and sets the OV * bit to 1. * * **Operations**:\n * ~~~ * src = Rs1.W[x]; * if (src == 0x8000_0000) { * src = 0x7fff_ffff; * OV = 1; * } else if (src[31] == 1) * src = -src; * } * Rd.W[x] = src; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKABS32(unsigned long long a) { unsigned long long result; __ASM volatile("dkabs32 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DKABS32 ===== */ /* ===== Inline Function Start for DKSLRA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKSLRA32 (64-bit SIMD 32-bit Shift Left Logical with Saturation or Shift Right Arithmetic) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKSLRA32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 31-bit elements logical left (positive) or arithmetic right (negative) shift operation with Q31 saturation for the left shift. * * **Description**:\n * The 31-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically based on the value of Rs2[5:0]. * Rs2[5:0] is in the signed range of [-2^5, 2^5-1]. A positive Rs2[5:0] means logical left shift and a negative Rs2[4:0] * means arithmetic right shift. The shift amount is the absolute value of Rs2[5:0]. However, the behavior of Rs2[5:0]==- * 2^5 (0x20) is defined to be equivalent to the behavior of Rs2[5:0]==-(2^5-1) (0x21). * * **Operations**:\n * ~~~ * if (Rs2[5:0] < 0) { * sa = -Rs2[5:0]; * sa = (sa == 32)? 31 : sa; * Rd.W[x] = SE32(Rs1.W[x][31:sa]); * } else { * sa = Rs2[4:0]; * res[(31+sa):0] = Rs1.W[x] <<(logic) sa; * if (res > (2^31)-1) { * res[31:0] = 0x7fff_ffff; OV = 1; * } else if (res < -2^31) { * res[31:0] = 0x8000_0000; OV = 1; * } * Rd.W[x] = res[31:0]; * } * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b int type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKSLRA32(unsigned long long a, int b) { unsigned long long result; __ASM volatile("dkslra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKSLRA32 ===== */ /* ===== Inline Function Start for DKADD32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKADD32(64-bit SIMD 32-bit Signed Saturating Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKADD32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element saturating additions simultaneously. * * **Description**:\n * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed integer elements in Rs2. If any * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV * bit is set to 1. The saturated results are written to Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.W[x] + Rs2.W[x]; * if (res[x] > 0x7fff_ffff) { * res[x] = 0x7fff_ffff; * OV = 1; * } else if (res[x] < 0x8000_0000) { * res[x] = 0x8000_0000; * OV = 1; * } * Rd.W[x] = res[x]; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKADD32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKADD32 ===== */ /* ===== Inline Function Start for DKSUB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKSUB32 (64-bit SIMD 32-bit Signed Saturating Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKSUB32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element saturating subtractions simultaneously. * * **Description**:\n * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1. If * any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the * OV bit is set to 1. The saturated results are written to Rd. * * **Operations**:\n * ~~~ * res[x] = Rs1.W[x] - Rs2.W[x]; * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res[x] < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[x] = res[x]; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKSUB32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKSUB32 ===== */ /* ===== Inline Function Start for DRADD16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DRADD16 (64-bit SIMD 16-bit Halving Signed Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DRADD16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element additions simultaneously. The results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed integer elements in Rs2. The results * are first arithmetically right-shifted by 1 bit and then written to Rd. * * **Operations**:\n * ~~~ * Rd.H[x] = [(Rs1.H[x]) + (Rs2.H[x])] s>> 1; * x=3...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DRADD16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dradd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DRADD16 ===== */ /* ===== Inline Function Start for DSUB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSUB16 (64-bit SIMD 16-bit Halving Signed Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSUB16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 16-bit integer element subtractions simultaneously. * * **Description**:\n * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed integer elements in Rs2. The results * are first arithmetically right-shifted by 1 bit and then written to Rd. * * **Operations**:\n * ~~~ * Rd.H[x] = [(Rs1.H[x]) - (Rs2.H[x])] ; * x=3...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSUB16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSUB16 ===== */ /* ===== Inline Function Start for DRADD32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DRADD32 (64-bit SIMD 32-bit Halving Signed Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DRADD32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element additions simultaneously. The results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed integer elements in Rs2. The results * are first arithmetically right-shifted by 1 bit and then written to Rd. * * **Operations**:\n * ~~~ * Rd.W[x] = [(Rs1.W[x]) + (Rs2.W[x])] s>> 1; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DRADD32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dradd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DRADD32 ===== */ /* ===== Inline Function Start for DSUB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSUB32 (64-bit SIMD 32-bit Halving Signed Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSUB32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 32-bit integer element subtractions simultaneously. * * **Description**:\n * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1 . The * results are written to Rd. * * **Operations**:\n * ~~~ * Rd.W[x] = [(Rs1.E[x]) - (Rs2.E[x])] ; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSUB32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSUB32 ===== */ /* ===== Inline Function Start for DMSR16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DMSR16 (Signed Multiply Halfs with Right Shift 16-bit and Cross Multiply Halfs with Right Shift 16-bit) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DMSR16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications and cross multiplications from the 16-bit elements of two registers; and each multiplications performs a right shift operation. * * **Description**:\n * For the `DMSR16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content * of 32-bit chunks in Rs2, multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content * of 32-bit chunks in Rs2. * At the same time, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom16-bit Q15 content of 32-bit * chunks in Rs2 and multiply the bottom16-bit Q15 content of 32-bit chunks in Rs1 with the top16-bit Q15 content of 32-bit * chunks in Rs2. The Q31 results are then right-shifted 16-bits and clipped to Q15 values. The Q15 results are then written * into Rd. * * **Operations**:\n * ~~~ * Rd.H[0] = (Rs1.H[0] s* Rs2.H[0]) s>> 16 * Rd.H[1] = (Rs1.H[1] s* Rs2.H[1]) s>> 16 * Rd.H[2] = (Rs1.H[1] s* Rs2.H[0]) s>> 16 * Rd.H[3] = (Rs1.H[0] s* Rs2.H[1]) s>> 16 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DMSR16(unsigned long a, unsigned long b) { unsigned long long result; __ASM volatile("dmsr16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DMSR16 ===== */ /* ===== Inline Function Start for DMSR17 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DMSR17 (Signed Multiply Halfs with Right Shift 17-bit and Cross Multiply Halfs with Right Shift 17-bit) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DMSR17 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications and cross multiplications from the 16-bit elements of two registers; * and each multiplications performs a right shift operation. * * **Description**:\n * For the `DMSR17` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content * of 32-bit chunks in Rs2, multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content * of 32-bit chunks in Rs2. * At the same time, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit * chunks in Rs2 and multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit * chunks in Rs2. The Q31 results are then right-shifted 17-bits and clipped to Q15 values. The Q15 results are then written * into Rd. * * **Operations**:\n * ~~~ * Rd.H[0] = (Rs1.H[0] s* Rs2.H[0]) s>> 17 * Rd.H[1] = (Rs1.H[1] s* Rs2.H[1]) s>> 17 * Rd.H[2] = (Rs1.H[1] s* Rs2.H[0]) s>> 17 * Rd.H[3] = (Rs1.H[0] s* Rs2.H[1]) s>> 17 * ~~~ * * \param [in] a unsigned long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DMSR17(unsigned long a, unsigned long b) { unsigned long long result; __ASM volatile("dmsr17 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DMSR17 ===== */ /* ===== Inline Function Start for DMSR33 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DMSR33 (Signed Multiply with Right Shift 33-bit and Cross Multiply with Right Shift 33-bit) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DMSR33 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do two signed 32-bit multiplications from the 32-bit elements of two registers, and each multiplications performs a right * shift operation. * * **Description**:\n * For the `DMSR33` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the top 32-bit Q31 content * of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64bit chunks in Rs1 with the bottom * 32-bit Q31 content of 64-bit. * The Q64 results are then right-shifted 33-bits and clipped to Q31 values. The Q31 results are then written into Rd. * * **Operations**:\n * ~~~ * Rd.W[0] = (Rs1.W[0] s* Rs2.W[0]) s>> 33 * Rd.W[1] = (Rs1.W[1] s* Rs2.W[1]) s>> 33 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DMSR33(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dmsr33 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DMSR33 ===== */ /* ===== Inline Function Start for DMXSR33 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DMXSR33 (Signed Multiply with Right Shift 33-bit and Cross Multiply with Right Shift 33-bit) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DMXSR33 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do two signed 32-bit cross multiplications from the 32-bit elements of two registers, and each multiplications performs a * right shift operation. * * **Description**:\n * For the `DMXSR33` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit Q31 * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with * the top 32-bit Q31 content of 64-bit chunks in Rs2. * The Q63 results are then right-shifted 33-bits and clipped to Q31 values. The Q31 results are then written into Rd. * * **Operations**:\n * ~~~ * Rd.W[0] = (Rs1.W[0] s* Rs2.W[1]) s>> 33 * Rd.W[1] = (Rs1.W[1] s* Rs2.W[0]) s>> 33 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DMXSR33(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dmxsr33 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DMXSR33 ===== */ /* ===== Inline Function Start for DREDAS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DREDAS16 (Reduced Addition and Reduced Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DREDAS16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do halfs reduced subtraction and halfs reduced addition from a register. The result is written to Rd. * * **Description**:\n * For the `DREDAS16` instruction, subtract the top 16-bit Q15 element from the bottom 16-bit Q15 element of the bottom * 32-bit Q31 content of 64-bit chunks in Rs1. At the same time, add the the top16-bit Q15 element with the bottom16-bit * Q15 element of the top 32-bit Q31 content of 64-bit chunks in Rs1. The two Q15 results are then written into Rd. * * **Operations**:\n * ~~~ * Rd.H[0] = Rs1.H[0] - Rs1.H[1] * Rd.H[1] = Rs1.H[2] + Rs1.H[3] * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_DREDAS16(unsigned long long a) { unsigned long result; __ASM volatile("dredas16 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DREDAS16 ===== */ /* ===== Inline Function Start for DREDSA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DREDSA16 (Reduced Subtraction and Reduced Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DREDSA16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do halfs reduced subtraction and halfs reduced addition from a register. The result is written to Rd. * * **Description**:\n * For the `DREDSA16` instruction, add the top 16-bit Q15 element from the bottom 16-bit Q15 element of the bottom 32-bit Q31 content of 64-bit chunks in Rs1. At the same time, subtract the the top16-bit Q15 element with the bottom16-bit Q15 element of the top 32-bit Q31 content of 64-bit chunks in Rs1. The two Q15 results are then written into Rd. * * **Operations**:\n * ~~~ * Rd.H[0] = Rs1.H[0] + Rs1.H[1] * Rd.H[1] = Rs1.H[2] - Rs1.H[3] * ~~~ * * \param [in] a unsigned long longtype of value stored in a * \return value stored in unsigned long type */ __STATIC_FORCEINLINE unsigned long __RV_DREDSA16(unsigned long long a) { unsigned long result; __ASM volatile("dredsa16 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DREDSA16 ===== */ /* ===== Inline Function Start for DKCLIP64 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKCLIP64 (64-bit Clipped to 16-bit Saturation Value) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKCLIP64 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 15-bit element arithmetic right shift operations and limit result into 32-bit int,then do saturate operation to 16-bit and * clip result to 16-bit Q15. * * **Description**:\n * For the `DKCLIP64` instruction, shift the input 15 bits to the right and data convert the result to 32-bit int type, after * which the input is saturated to limit the data to between 2^15-1 and -2^15. the result is converted to 16-bits q15 type. The * final results are written to Rd. * * **Operations**:\n * ~~~ * const int32_t max = (int32_t)((1U << 15U) - 1U); * const int32_t min = -1 - max ; * int32_t val = (int32_t)(Rs s>> 15); * if (val > max) { * Rd = max; * } else if (val < min) { * Rd = min; * } else { * Rd = (int16_t)val; * } * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in int16_t type */ __STATIC_FORCEINLINE int16_t __RV_DKCLIP64(unsigned long long a) { int16_t result; __ASM volatile("dkclip64 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DKCLIP64 ===== */ /* ===== Inline Function Start for DKMDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKMDA (Signed Multiply Two Halfs and Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKMDA Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then adds the two 32-bit results together. * The addition result may be saturated. * * **Description**:\n * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the * 32-bit elements of Rs2 and then adds the result to the result of multiplying the top 16-bit content of the 32-bit elements of * Rs1 with the top 16-bit content of the 32-bit elements of Rs2. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1 The final results are * written to Rd. The 16-bit contents are treated as signed integers * * **Operations**:\n * ~~~ * if (Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000){ * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); * } else { * Rd.W[x] = 0x7fffffff; * OV = 1; * } * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKMDA(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkmda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKMDA ===== */ /* ===== Inline Function Start for DKMXDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKMXDA (Signed Crossed Multiply Two Halfs and Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKMXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then adds the two 32-bit results together. * The addition result may be saturated. * * DKMXDA: top*bottom + top*bottom (per 32-bit element) * * **Description**:\n * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit * elements of Rs2 and then adds the result to the result of multiplying the top 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2. * The addition result is checked for saturation.If saturation happens, the result is saturated to 2^31-1 The final results are * written to Rd. The 16-bit contents are treated as signed integers. * * **Operations**:\n * ~~~ * if (Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000){ * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); * } else { * Rd.W[x] = 0x7fffffff; * OV = 1; * } * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKMXDA(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkmxda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKMXDA ===== */ /* ===== Inline Function Start for DSMDRS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMDRS (Signed Multiply Two Halfs and Reverse Subtract) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMDRS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then perform a subtraction operation * between the two 32-bit results. * * DSMDRS: bottom*bottom - top*top (per 32-bit element) * * **Description**:\n * This instruction multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit * elements of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of the 32-bit elements * of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2. * The subtraction result is written to the corresponding 32-bit element of Rd (The 16-bit contents of multiplication are * treated as signed integers). * * **Operations**:\n * ~~~ * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); x = 1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSMDRS(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dsmdrs %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMDRS ===== */ /* ===== Inline Function Start for DSMXDS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMXDS (Signed Crossed Multiply Two Halfs and Subtract) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMXDS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then perform a subtraction operation * between the two 32-bit results. * * DSMXDS: top*bottom - bottom*top (per 32-bit element) * * **Description**:\n * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit * elements of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of the 32-bit elements * of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2. * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of multiplication are * treated as signed integers. * * **Operations**:\n * ~~~ * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); x = 1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSMXDS(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dsmxds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMXDS ===== */ /* ===== Inline Function Start for DSMBB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMBB32 (Signed Multiply Bottom Word & Bottom Word) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMBB32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit result to a third register. * * DSMBB32: bottom*bottom * * **Description**:\n * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2. The 64-bit multiplication result is written to Rd. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * res = (Rs1.W[0] * Rs2.W[0]); * Rd = res; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMBB32(unsigned long long a, unsigned long long b) { long long result; __ASM volatile("dsmbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMBB32 ===== */ /* ===== Inline Function Start for DSMBB32.sra14 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMBB32.sra14 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 14) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMBB32.sra14 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 14- * bit,finally write the 64-bit result to a third register. * * DSMBB32.sra14: bottom*bottom s>> 14 * * **Description**:\n * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2. The 64-bit multiplication result is written to Rd after right shift 14-bit. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * res = (Rs1.W[0] * Rs2.W[0]) s>> 14; * Rd = res; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMBB32_SRA14(unsigned long long a, unsigned long long b) { long long result; __ASM volatile("dsmbb32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMBB32.sra14 ===== */ /* ===== Inline Function Start for DSMBB32.sra32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMBB32.sra32 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 32) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMBB32.sra32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 32- * bit,finally write the 64-bit result to a third register. * * DSMBB32.sra32: bottom*bottom s >> 32 * * **Description**:\n * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2. * The 64-bit multiplication result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * res = (Rs1.W[0] * Rs2.W[0]) s>> 32; * Rd = res; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMBB32_SRA32(unsigned long long a, unsigned long long b) { long long result; __ASM volatile("dsmbb32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMBB32.sra32 ===== */ /* ===== Inline Function Start for DSMBT32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief SMBT32 (Signed Multiply Bottom Word & Top Word) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMBT32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit * result to a third register. * * DSMBT32: bottom*top * * **Description**:\n * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * res = (Rs1.W[0] * Rs2.W[0]); * Rd = res; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMBT32(unsigned long long a, unsigned long long b) { long long result; __ASM volatile("dsmbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMBT32 ===== */ /* ===== Inline Function Start for DSMBT32.sra14 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMBT32.sra14 (Signed Multiply Bottom Word & Top Word with Right Shift 14) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMBT32.sra14 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 14- * bit,finally write the 64-bit result to a third register. * * DSMBT32.sra14: bottom*bottom s>> 14 * * **Description**:\n * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication * result is written to Rd after right shift 14-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * res = (Rs1.W[0] * Rs2.W[0]) s>> 14; * Rd = res; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMBT32_SRA14(unsigned long long a, unsigned long long b) { long long result; __ASM volatile("dsmbt32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMBT32.sra14 ===== */ /* ===== Inline Function Start for DSMBT32.sra32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMBT32.sra32 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 32) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMBT32.sra32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 32- * bit,finally write the 64-bit result to a third register. * * DSMBT32.sra32: bottom*bottom s>> 32 * * **Description**:\n * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication * result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * res = (Rs1.W[0] * Rs2.W[0]) s>> 14; * Rd = res; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMBT32_SRA32(unsigned long long a, unsigned long long b) { long long result; __ASM volatile("dsmbt32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMBT32.sra32 ===== */ /* ===== Inline Function Start for DSMTT32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMTT32 (Signed Multiply Top Word & Top Word) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMTT32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit * result to a third register. * * DSMTT32: top*top * * **Description**:\n * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * res = Rs1.W[1] * Rs2.W[1]; * Rd = res; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMTT32(unsigned long long a, unsigned long long b) { long long result; __ASM volatile("dsmtt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMTT32 ===== */ /* ===== Inline Function Start for DSMTT32.sra14 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMTT32.sra14 (Signed Multiply Top Word & Top Word with Right Shift 14-bit) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMTT32.sra14 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register,then right shift 14-bit, * finally write the 64-bit result to a third register. * * DSMTT32.sra14: top*top s>> 14 * * **Description**:\n * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication * result is written to Rd after right shift 14-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * res = Rs1.W[1] * Rs2.W[1] >> 14; * Rd = res; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMTT32_SRA14(unsigned long long a, unsigned long long b) { long long result; __ASM volatile("dsmtt32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMTT32.sra14 ===== */ /* ===== Inline Function Start for DSMTT32.sra32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMTT32.sra32 (Signed Multiply Top Word & Top Word with Right Shift 32-bit) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMTT32.sra32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register,then right shift 32-bit, * finally write the 64-bit result to a third register. * * DSMTT32.sra32: top*top s>> 32 * * **Description**:\n * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication * result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * res = Rs1.W[1] * Rs2.W[1] >> 32; * Rd = res; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMTT32_SRA32(unsigned long long a, unsigned long long b) { long long result; __ASM volatile("dsmtt32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMTT32.sra32 ===== */ /* ===== Inline Function Start for DPKBB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DPKBB32 (Pack Two 32-bit Data from Both Bottom Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DPKBB32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Pack 32-bit data from 64-bit chunks in two registers. * * DPKBB32: bottom.bottom * * **Description**:\n * This instruction moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. * * **Operations**:\n * ~~~ * Rd = CONCAT(Rs1.W[0], Rs2.W[0]); * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DPKBB32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dpkbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DPKBB32 ===== */ /* ===== Inline Function Start for DPKBT32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DPKBT32 (Pack Two 32-bit Data from Bottom and Top Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DPKBT32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Pack 32-bit data from 64-bit chunks in two registers. * * DPKBT32: bottom.top * * **Description**:\n * This instruction moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. * * **Operations**:\n * ~~~ * Rd = CONCAT(Rs1.W[0], Rs2.W[1]); * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DPKBT32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dpkbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DPKBT32 ===== */ /* ===== Inline Function Start for DPKTT32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DPKTT32 (Pack Two 32-bit Data from Both Top Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DPKTT32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Pack 32-bit data from 64-bit chunks in two registers. * * DPKTT32: top.top * * **Description**:\n * This instruction moves Rs1.W[1] to Rd.W[0] and moves Rs2.W[1] to Rd.W[0]. * * **Operations**:\n * ~~~ * Rd = CONCAT(Rs1.W[1], Rs2.W[1]); * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DPKTT32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dpktt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DPKTT32 ===== */ /* ===== Inline Function Start for DPKTB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DPKTB32 (Pack Two 32-bit Data from Top and Bottom Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DPKTB32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Pack 32-bit data from 64-bit chunks in two registers. * * DPKTB32: top.bottom * * **Description**:\n * This instruction moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. * * **Operations**:\n * ~~~ * Rd = CONCAT(Rs1.W[1], Rs2.W[0]); * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DPKTB32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dpktb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DPKTB32 ===== */ /* ===== Inline Function Start for DPKTB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DPKTB16 (Pack Two 32-bit Data from Top and Bottom Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DPKTB16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Pack 16-bit data from 32-bit chunks in two registers. * * DPKTB16: top.bottom * * **Description**:\n * This instruction moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0]. * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DPKTB16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dpktb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DPKTB16 ===== */ /* ===== Inline Function Start for DPKBB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DPKBB16 (Pack Two 16-bit Data from Both Bottom Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DPKBB16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Pack 16-bit data from 32-bit chunks in two registers. * * PKBB16: bottom.bottom * * **Description**:\n * This instruction moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0]. * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DPKBB16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dpkbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DPKBB16 ===== */ /* ===== Inline Function Start for DPKBT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DPKBT16 (Pack Two 16-bit Data from Bottom and Top Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DPKBT16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Pack 16-bit data from 32-bit chunks in two registers. * * PKBT16: bottom.top * * **Description**:\n * This instruction moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DPKBT16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dpkbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DPKBT16 ===== */ /* ===== Inline Function Start for DPKTT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DPKTT16 (Pack Two 16-bit Data from Both Top Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DPKTT16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Pack 16-bit data from 32-bit chunks in two registers. * * PKTT16 top.top * * **Description**:\n * This instruction moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. * * **Operations**:\n * ~~~ * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DPKTT16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dpktt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DPKTT16 ===== */ /* ===== Inline Function Start for DSRA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSRA16 (SIMD 16-bit Shift Right Arithmetic) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSRA16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a variable from a GPR. * * **Description**:\n * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out bits are filled with the sign-bit of * the data elements. The shift amount is specified by the low-order 4-bits of the value in the Rs2 register. And the results * are written to Rd. * * **Operations**:\n * ~~~ * sa = Rs2[3:0]; * if (sa != 0) * { * Rd.H[x] = SE16(Rs1.H[x][15:sa]); * } else { * Rd = Rs1; * } * x=3...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSRA16(unsigned long long a, unsigned long b) { unsigned long long result; __ASM volatile("dsra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSRA16 ===== */ /* ===== Inline Function Start for DADD16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DADD16 (16-bit Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DADD16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 16-bit integer element additions simultaneously. * * **Description**:\n * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit unsigned integer elements in Rs2. And * the results are written to Rd. * * **Operations**:\n * ~~~ * Rd.H[x] = Rs1.H[x] + Rs2.H[x]; * x=3...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DADD16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DADD16 ===== */ /* ===== Inline Function Start for DADD32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DADD32 (32-bit Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DADD32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 32-bit integer element additions simultaneously. * * **Description**:\n * This instruction adds the 32-bit integer elements in Rs1 with the 32-bit integer elements in Rs2, and then writes the 32-bit * element results to Rd. * * **Operations**:\n * ~~~ * Rd.W[x] = Rs1.W[x] + Rs2.W[x]; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DADD32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DADD32 ===== */ /* ===== Inline Function Start for DSMBB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMBB16 (Signed Multiply Bottom Half & Bottom Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMBB16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit elements * of another register and write the result to a third register. * * DSMBB16: W[x].bottom*W[x].bottom * * **Description**:\n * For the `DSMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom * 16-bit content of the 32-bit elements of Rs2. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSMBB16(unsigned long long a, unsigned long long b) /* pass */ { unsigned long long result; __ASM volatile("dsmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMBB16 ===== */ /* ===== Inline Function Start for DSMBT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMBT16 (Signed Multiply Bottom Half & Top Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMBT16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit * elements of another register and write the result to a third register. * * DSMBT16: W[x].bottom *W[x].top * * **Description**:\n * For the `DSMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit * content of the 32-bit elements of Rs2. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSMBT16(unsigned long long a, unsigned long long b) /* pass */ { unsigned long long result; __ASM volatile("dsmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMBT16 ===== */ /* ===== Inline Function Start for DSMTT16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSMTT16 (Signed Multiply Top Half & Top Half) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMTT16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit * elements of another register and write the result to a third register. * * DSMTT16: W[x].top * W[x].top * * **Description**:\n * For the `DSMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit * content of the 32-bit elements of Rs2. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSMTT16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dsmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMTT16 ===== */ /* ===== Inline Function Start for DRCRSA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DRCRSA16 (16-bit Signed Halving Cross Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DRCRSA16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in a 32-bit chunk simultaneously. * Operands are from crossed positions in 32-bit chunks. The results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 16-bit signed integer in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer in * [15:0] of 32-bit chunks in Rs2, and adds the 16-bit signed integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed * integer in [15:0] of 32-bit chunks in Rs1. The element results are first logically right-shifted by 1 bit and then written to * [31:16] of 32- bit chunks in Rd and [15:0] of 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) s>> 1; * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) s>> 1; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DRCRSA16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("drcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DRCRSA16 ===== */ /* ===== Inline Function Start for DRCRSA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DRCRSA32 (32-bit Signed Halving Cross Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DRCRSA32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in a 64-bit chunk simultaneously. * Operands are from crossed 32-bit elements. The results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit signed integer element in * [31:0] of Rs2, and adds the 32-bit signed integer element in [63:32] of Rs2 from the 32-bit signed integer element in [31:0] * of Rs1. The element results are first arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and * [31:0] of Rd for subtraction. * * **Operations**:\n * ~~~ * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) s>> 1; * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) s>> 1; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DRCRSA32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("drcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DRCRSA32 ===== */ /* ===== Inline Function Start for DRCRAS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DRCRAS16 (16-bit Signed Halving Cross Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DRCRAS16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in a 32-bit chunk simultaneously. * Operands are from crossed positions in 32-bit chunks. The results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in * [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit * unsigned integer in [15:0] of 32-bit chunks in Rs1. The element results are first logically right-shifted by 1 bit and then * written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) s>> 1; * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) s>> 1; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DRCRAS16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("drcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DRCRAS16 ===== */ /* ===== Inline Function Start for DRCRAS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DRCRAS32 (32-bit Signed Cross Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DRCRAS32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in a 64-bit chunk simultaneously. * Operands are from crossed 32-bit elements. The results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit signed integer element in [31:0] * of Rs2, and subtracts the 32-bit signed integer element in [63:32] of Rs2 from the 32-bit signed integer element in [31:0] * of Rs1. The element results are first arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition * and [31:0] of Rd for subtraction. * * **Operations**:\n * ~~~ * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) s>> 1; * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) s>> 1; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DRCRAS32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("DRCRAS32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DRCRAS32 ===== */ /* ===== Inline Function Start for DKCRAS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKCRAS16 (16-bit Signed Saturating Cross Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKCRAS16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element saturating addition and 16-bit signed integer element saturating subtraction in a 32-bit * chunk simultaneously. Operands are from crossed positions in 32-bit chunks. * * **Description**:\n * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer * element in [15:0] of 32-bit chunks in Rs2; at the same time, it subtracts the 16-bit signed integer element in [31:16] of * 32-bit chunks in Rs2 from the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1. * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks * in Rd for addition. * * **Operations**:\n * ~~~ * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0]; * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16]; * for (res in [res1, res2]) { * if (res > (2^15)-1) { * res = (2^15)-1; * OV = 1; * } else if (res < -2^15) { * res = -2^15; * OV = 1; * } * } * Rd.W[x][31:16] = res1; * Rd.W[x][15:0] = res2; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKCRAS16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKCRAS16 ===== */ /* ===== Inline Function Start for DKCRSA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKCRSA16 (16-bit Signed Saturating Cross Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKCRSA16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element saturating addition in a 32-bit * chunk simultaneously. Operands are from crossed positions in 32-bit chunks. * * **Description**:\n * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer * element in [31:16] of 32-bit chunks in Rs1; at the same time, it adds the 16-bit signed integer element in [31:16] of 32-bit * chunks in Rs2 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1. * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks * in Rd for subtraction. * * **Operations**:\n * ~~~ * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0]; * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16]; * for (res in [res1, res2]) { * if (res > (2^15)-1) { * res = (2^15)-1; * OV = 1; * } else if (res < -2^15) { * res = -2^15; * OV = 1; * } * } * Rd.W[x][31:16] = res1; * Rd.W[x][15:0] = res2; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKCRSA16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKCRSA16 ===== */ /* ===== Inline Function Start for DRSUB16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DRSUB16 (16-bit Signed Halving Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DRSUB16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element subtractions simultaneously. The results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit signed integer elements in Rs1. The * results are first arithmetically right-shifted by 1 bit and then written to Rd. * * **Operations**:\n * ~~~ * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) s>> 1; * x=3...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DRSUB16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("drsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DRSUB16 ===== */ /* ===== Inline Function Start for DSTSA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSTSA32 (32-bit Straight Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSTSA32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit chunk simultaneously. Operands are * from corresponding 32-bit elements. * * **Description**:\n * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [63:32] of Rs1, * and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit * integer element in [31:0] of Rs2, and writes the result to [31:0] of Rd. * * **Operations**:\n * ~~~ * Rd.W[1] = Rs1.W[1] - Rs2.W[1]; * Rd.W[0] = Rs1.W[0] + Rs2.W[0]; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSTSA32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSTSA32 ===== */ /* ===== Inline Function Start for DSTAS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSTAS32 (SIMD 32-bit Straight Addition & Subtractionn) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSTAS32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit chunk simultaneously. Operands are * from corresponding 32-bit elements. * * **Description**:\n * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [63:32] of Rs2, * and writes the result to [63:32] of Rd; at the same time, it subtracts the 32-bit integer element in [31:0] of Rs2 * from the 32-bit integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd. * * **Operations**:\n * ~~~ * Rd.W[1] = Rs1.W[1] + Rs2.W[1]; * Rd.W[0] = Rs1.W[0] - Rs2.W[0]; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSTAS32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("DSTAS32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSTAS32 ===== */ /* ===== Inline Function Start for DKCRSA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKCRSA32 (32-bit Signed Saturating Cross Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKCRSA32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element saturating addition in a 64-bit * chunk simultaneously. Operands are from crossed 32-bit elements. * * **Description**:\n * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [63:32] of Rs1; at * the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2. If any * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is * set to 1. The saturated results are written to [63:32] of Rd for subtraction and [31:0] of Rd for addition. * * **Operations**:\n * ~~~ * res[1] = Rs1.W[1] - Rs2.W[0]; * res[0] = Rs1.W[0] + Rs2.W[1]; * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[1] = res[1]; * Rd.W[0] = res[0]; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKCRSA32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKCRSA32 ===== */ /* ===== Inline Function Start for DKCRAS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKCRAS32 (32-bit Signed Saturating Cross Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKCRAS32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element saturating addition in a 64-bit * chunk simultaneously. Operands are from crossed 32-bit elements. * * **Description**:\n * This instruction adds the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [63:32] of Rs1; at the * same time, it subtracts the 32-bit integer element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2. If any * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is * set to 1. The saturated results are written to [63:32] of Rd for subtraction and [31:0] of Rd for addition. * * **Operations**:\n * ~~~ * res[1] = Rs1.W[1] + Rs2.W[0]; * res[0] = Rs1.W[0] - Rs2.W[1]; * if (res[x] > (2^31)-1) { * res[x] = (2^31)-1; * OV = 1; * } else if (res < -2^31) { * res[x] = -2^31; * OV = 1; * } * Rd.W[1] = res[1]; * Rd.W[0] = res[0]; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKCRAS32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKCRAS32 ===== */ /* ===== Inline Function Start for DCRSA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DCRSA32 (32-bit Cross Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DCRSA32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit chunk simultaneously. Operands are * from crossed 32-bit elements. * * **Description**:\n * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and * writes the result to [63:32] of Rd; at the same time, it subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit * integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd. * * **Operations**:\n * ~~~ * res[1] = Rs1.W[1] - Rs2.W[0]; * res[0] = Rs1.W[0] + Rs2.W[1]; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DCRSA32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DCRSA32 ===== */ /* ===== Inline Function Start for DCRAS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DCRAS32 (32-bit Cross Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DCRAS32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit chunk simultaneously. Operands are * from crossed 32-bit elements. * * **Description**:\n * This instruction subtracts the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and * writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer element in [63:32] of Rs2 from the 32-bit * integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd. * * **Operations**:\n * ~~~ * res[1] = Rs1.W[1] - Rs2.W[0]; * res[0] = Rs1.W[0] + Rs2.W[1]; * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DCRAS32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DCRAS32 ===== */ /* ===== Inline Function Start for DKSTSA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKSTSA16 (16-bit Signed Saturating Straight Subtraction & Addition) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKSTSA16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element saturating addition in a 32-bit * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. * * **Description**:\n * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer * element in [31:16] of 32-bit chunks in Rs1; at the same time, it adds the 16-bit signed integer element in [15:0] of 32-bit * chunks in Rs2 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1. * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks * in Rd for addition. * * **Operations**:\n * ~~~ * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16]; * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0]; * for (res in [res1, res2]) { * if (res > (2^15)-1) { * res = (2^15)-1; * OV = 1; * } else if (res < -2^15) { * res = -2^15; * OV = 1; * } * } * Rd.W[x][31:16] = res1; * Rd.W[x][15:0] = res2; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKSTSA16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKSTSA16 ===== */ /* ===== Inline Function Start for DKSTAS16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DKSTAS16 (16-bit Signed Saturating Straight Addition & Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKSTAS16 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 16-bit signed integer element saturating addition and 16-bit signed integer element saturating subtraction in a 32-bit * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. * * **Description**:\n * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer * element in [31:16] of 32-bit chunks in Rs2; at the same time, it subtracts the 16-bit signed integer element in [15:0] of * 32-bit chunks in Rs2 from the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1. * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks * in Rd for addition. * * **Operations**:\n * ~~~ * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16]; * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0]; * for (res in [res1, res2]) { * if (res > (2^15)-1) { * res = (2^15)-1; * OV = 1; * } else if (res < -2^15) { * res = -2^15; * OV = 1; * } * } * Rd.W[x][31:16] = res1; * Rd.W[x][15:0] = res2; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKSTAS16(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("dkstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKSTAS16 ===== */ /* ===== Inline Function Start for DSCLIP8 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSCLIP8 (8-bit Signed Saturation and Clip) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSCLIP8 Rd, Rs1, imm3u[2:0] * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Limit the 8-bit signed integer elements of a register into a signed range simultaneously. * * **Description**:\n * This instruction limits the 8-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm3u and * 2^imm3u-1, and writes the limited results to Rd. For example, if imm3u is 3, the 8-bit input values should be saturated * between 7 and -8. If saturation is performed, set OV bit to 1. * * **Operations**:\n * ~~~ * src = Rs1.B[x]; * if (src > (2^imm3u)-1) { * src = (2^imm3u)-1; * OV = 1; * } else if (src < -2^imm3u) { * src = -2^imm3u; * OV = 1; * } * Rd.B[x] = src * x=7...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ #define __RV_DSCLIP8(a, b) \ ({ \ unsigned long long result; \ unsigned long long __a = (unsigned long long)(a); \ __ASM volatile("dsclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for DSCLIP8 ===== */ /* ===== Inline Function Start for DSCLIP16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSCLIP16 (16-bit Signed Saturation and Clip) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSCLIP16 Rd, Rs1, imm4u[3:0] * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Limit the 16-bit signed integer elements of a register into a signed range simultaneously. * * **Description**:\n * This instruction limits the 16-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm4u and * 2^imm4u-1, and writes the limited results to Rd. For example, if imm4u is 3, the 32-bit input values should be saturated * between 7 and -8. If saturation is performed, set OV bit to 1. * * **Operations**:\n * ~~~ * src = Rs1.H[x]; * if (src > (2^imm4u)-1) { * src = (2^imm4u)-1; * OV = 1; * } else if (src < -2^imm4u) { * src = -2^imm4u; * OV = 1; * } * Rd.H[x] = src * x=3...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ #define __RV_DSCLIP16(a, b) \ ({ \ unsigned long long result; \ unsigned long long __a = (unsigned long long)(a); \ __ASM volatile("dsclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for DSCLIP16 ===== */ /* ===== Inline Function Start for DSCLIP32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSCLIP32 (32-bit Signed Saturation and Clip) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSCLIP32 Rd, Rs1, imm5u[4:0] * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Limit the 32-bit signed integer elements of a register into a signed range simultaneously. * * **Description**:\n * This instruction limits the 32-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm5u and * 2^imm5u-1, and writes the limited results to Rd. For example, if imm5u is 3, the 32-bit input values should be saturated * between 7 and -8. If saturation is performed, set OV bit to 1. * * **Operations**:\n * ~~~ * src = Rs1.W[x]; * if (src > (2^imm5u)-1) { * src = (2^imm5u)-1; * OV = 1; * } else if (src < -2^imm5u) { * src = -2^imm5u; * OV = 1; * } * Rd.W[x] = src * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ #define __RV_DSCLIP32(a, b) \ ({ \ unsigned long long result; \ unsigned long long __a = (unsigned long long)(a); \ __ASM volatile("dsclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ result; \ }) /* ===== Inline Function End for DSCLIP32 ===== */ /* ===== Inline Function Start for DRSUB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DRSUB32 (32-bit Signed Halving Subtraction) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DRSUB32 Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do 32-bit signed integer element subtractions simultaneously. The results are halved to avoid overflow or saturation. * * **Description**:\n * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1. The * results are first arithmetically right-shifted by 1 bit and then written to Rd. * * **Operations**:\n * ~~~ * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) s>> 1; * x=1...0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DRSUB32(unsigned long long a, unsigned long long b) { unsigned long long result; __ASM volatile("drsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DRSUB32 ===== */ /* ===== Inline Function Start for DPACK32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DPACK32 (SIMD Pack Two 32-bit Data To 64-bit) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DPACK32 Rd, Rs1, Rs2 * # Rd is even/odd pair of register * ~~~ * * **Purpose**:\n * Pack two 32-bit datas which from two registers into a 64-bit data. * * **Description**:\n * This instruction moves 32-bit Rs1 to Rd.W[1] and moves 32-bit Rs2 to Rd.W[0]. * * **Operations**:\n * ~~~ * Rd = CONCAT(Rs1.W , Rs2.W); * ~~~ * * \param [in] a signed long type of value stored in a * \param [in] b signed long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DPACK32(signed long a, signed long b) { unsigned long long result; __ASM volatile("dpack32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DPACK32 ===== */ /* ===== Inline Function Start for DSUNPKD810 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSUNPKD810 (Signed Unpacking Bytes 1 & 0) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSUNPKD810 Rd, Rs1 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Unpack byte 1 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `DSUNPKD810` instruction, it unpacks byte 1 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords * and writes the results to the top part and the bottom part of 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[1]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0]) * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD810(unsigned long long a) { unsigned long long result; __ASM volatile("dsunpkd810 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DSUNPKD810 ===== */ /* ===== Inline Function Start for DSUNPKD820 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSUNPKD820 (Signed Unpacking Bytes 2 & 0) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSUNPKD820 Rd, Rs1 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Unpack byte 2 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `DSUNPKD820` instruction, it unpacks byte 2 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords * and writes the results to the top part and the bottom part of 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[2]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0]) * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD820(unsigned long long a) { unsigned long long result; __ASM volatile("dsunpkd820 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DSUNPKD820 ===== */ /* ===== Inline Function Start for DSUNPKD830 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSUNPKD830 (Signed Unpacking Bytes 3 & 0) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSUNPKD830 Rd, Rs1 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Unpack byte 3 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `DSUNPKD830` instruction, it unpacks byte 3 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords * and writes the results to the top part and the bottom part of 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0]) * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD830(unsigned long long a) { unsigned long long result; __ASM volatile("dsunpkd830 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DSUNPKD830 ===== */ /* ===== Inline Function Start for DSUNPKD831 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSUNPKD831 (Signed Unpacking Bytes 3 & 1) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSUNPKD831 Rd, Rs1 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Unpack byte 3 and byte 1 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `DSUNPKD831` instruction, it unpacks byte 3 and byte 1 of 32-bit chunks in Rs1 into two 16-bit signed halfwords * and writes the results to the top part and the bottom part of 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[1]) * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD831(unsigned long long a) { unsigned long long result; __ASM volatile("dsunpkd831 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DSUNPKD831 ===== */ /* ===== Inline Function Start for DSUNPKD832 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DSUNPKD832 (Signed Unpacking Bytes 3 & 2) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSUNPKD832 Rd, Rs1 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Unpack byte 3 and byte 2 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `DSUNPKD832` instruction, it unpacks byte 3 and byte 2 of 32-bit chunks in Rs1 into two 16-bit signed halfwords * and writes the results to the top part and the bottom part of 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[2]) * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD832(unsigned long long a) { unsigned long long result; __ASM volatile("dsunpkd832 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DSUNPKD832 ===== */ /* ===== Inline Function Start for DZUNPKD810 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DZUNPKD810 (UnSigned Unpacking Bytes 1 & 0) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DZUNPKD810 Rd, Rs1 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Unpack byte 1 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `DZUNPKD810` instruction, it unpacks byte 1 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords * and writes the results to the top part and the bottom part of 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[1]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0]) * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD810(unsigned long long a) { unsigned long long result; __ASM volatile("dzunpkd810 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DZUNPKD810 ===== */ /* ===== Inline Function Start for DZUNPKD820 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DZUNPKD820 (UnSigned Unpacking Bytes 2 & 0) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DZUNPKD820 Rd, Rs1 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Unpack byte 2 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `DZUNPKD820` instruction, it unpacks byte 2 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords * and writes the results to the top part and the bottom part of 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[2]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0]) * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD820(unsigned long long a) { unsigned long long result; __ASM volatile("dzunpkd820 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DZUNPKD820 ===== */ /* ===== Inline Function Start for DZUNPKD830 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DZUNPKD830 (UnSigned Unpacking Bytes 3 & 0) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DZUNPKD830 Rd, Rs1 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Unpack byte 3 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `DZUNPKD830` instruction, it unpacks byte 3 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords * and writes the results to the top part and the bottom part of 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0]) * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD830(unsigned long long a) { unsigned long long result; __ASM volatile("dzunpkd830 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DZUNPKD830 ===== */ /* ===== Inline Function Start for DZUNPKD831 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DZUNPKD831 (UnSigned Unpacking Bytes 3 & 1) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DZUNPKD831 Rd, Rs1 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Unpack byte 3 and byte 1 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `DZUNPKD831` instruction, it unpacks byte 3 and byte 1 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords * and writes the results to the top part and the bottom part of 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[1]) * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD831(unsigned long long a) { unsigned long long result; __ASM volatile("dzunpkd831 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DZUNPKD831 ===== */ /* ===== Inline Function Start for DZUNPKD832 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 * \brief DZUNPKD832 (UnSigned Unpacking Bytes 3 & 2) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DZUNPKD832 Rd, Rs1 * # Rd, Rs1 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Unpack byte 3 and byte 2 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register. * * **Description**:\n * For the `DZUNPKD832` instruction, it unpacks byte 3 and byte 2 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords * and writes the results to the top part and the bottom part of 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3]) * Rd.W[m].H[0] = SE16(Rs1.W[m].B[2]) * ~~~ * * \param [in] a unsigned long long type of value stored in a * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD832(unsigned long long a) { unsigned long long result; __ASM volatile("dzunpkd832 %0, %1" : "=r"(result) : "r"(a)); return result; } /* ===== Inline Function End for DZUNPKD832 ===== */ /* ===== Inline Function Start for DKMMAC ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMMAC (64-bit MSW 32x32 Signed Multiply and Saturating Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKMMAC Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do MSW 32x32 element signed multiplications and saturating addition simultaneously. The results are written into Rd. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2 * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range * and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by * adding a 1 to bit 31 of the results. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) { * res = sat.q31(dop + (aop s* bop)[63:32]); * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKMMAC(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmmac %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMMAC ===== */ /* ===== Inline Function Start for DKMMAC.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMMAC.u (64-bit MSW 32x32 Unsigned Multiply and Saturating Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKMMAC.u Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do MSW 32x32 element unsigned multiplications and saturating addition simultaneously. The results are written into Rd. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2 * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range * and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by * adding a 1 to bit 31 of the results. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) { * res = sat.q31(dop + RUND(aop u* bop)[63:32]); * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKMMAC_U(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmmac.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMMAC.u ===== */ /* ===== Inline Function Start for DKMMSB ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMMSB (64-bit MSW 32x32 Signed Multiply and Saturating Sub) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKMMSB Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do MSW 32x32 element signed multiplications and saturating subtraction simultaneously. The results are written into Rd. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2 * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the * range and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by * adding a 1 to bit 31 of the results. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) { * res = sat.q31(dop - (aop s* bop)[63:32]); * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKMMSB(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmmsb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMMSB ===== */ /* ===== Inline Function Start for DKMMSB.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMMSB.u (64-bit MSW 32x32 Unsigned Multiply and Saturating Sub) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKMMSB.u Rd, Rs1, Rs2 * # Rd, Rs1, Rs2 are all even/odd pair of registers * ~~~ * * **Purpose**:\n * Do MSW 32x32 element unsigned multiplications and saturating subtraction simultaneously. The results are written into Rd. * * **Description**:\n * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2 * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the * range and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by * adding a 1 to bit 31 of the results. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) { * res = sat.q31(dop - (aop u* bop)[63:32]); * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKMMSB_U(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmmsb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMMSB.u ===== */ /* ===== Inline Function Start for DKMADA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMADA (Saturating Signed Multiply Two Halfs and Two Adds) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMADA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two 16x16 with 32-bit signed double addition simultaneously. The results are written into Rd. * * **Description**:\n * It multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit * elements in Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom * * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) { * mul1 = aop.H[1] s* bop.H[1]; * mul2 = aop.H[0] s* bop.H[0]; * res = sat.q31(dop + mul1 + mul2); * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKMADA(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMADA ===== */ /* ===== Inline Function Start for DKMAXDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMAXDA (Two Cross 16x16 with 32-bit Signed Double Add) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMAXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two cross 16x16 with 32-bit signed double addition simultaneously. The results are written into Rd. * * **Description**:\n * It multiplies the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit * elements in Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of * 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in elements in Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom * * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) { * mul1 = aop.H[1] s* bop.H[0]; * mul2 = aop.H[0] s* bop.H[1]; * res = sat.q31(dop + mul1 + mul2); * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKMAXDA(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmaxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMAXDA ===== */ /* ===== Inline Function Start for DKMADS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMADS (Two 16x16 with 32-bit Signed Add and Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMADS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two 16x16 with 32-bit signed addition and subtraction simultaneously. The results are written into Rd. * * **Description**:\n * It multiplies the bottom 16-bit content of 32-bit elements in Rs1 with * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit * elements in Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom * * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) { * mul1 = aop.H[1] s* bop.H[1]; * mul2 = aop.H[0] s* bop.H[0]; * res = sat.q31(dop + mul1 - mul2); * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKMADS(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmads %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMADS ===== */ /* ===== Inline Function Start for DKMADRS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMADRS (Two 16x16 with 32-bit Signed Add and Reversed Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMADRS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two 16x16 with 32-bit signed addition and revered subtraction simultaneously. The results are written into Rd. * * **Description**:\n * it multiplies the top 16-bit content of 32-bit elements in Rs1 with the * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32- * bit elements in Rs2 * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom * * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) { * mul1 = aop.H[1] s* bop.H[1]; * mul2 = aop.H[0] s* bop.H[0]; * res = sat.q31(dop - mul1 + mul2); * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKMADRS(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmadrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMADRS ===== */ /* ===== Inline Function Start for DKMAXDS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMAXDS (Saturating Signed Crossed Multiply Two Halfs & Subtract & Add) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMAXDS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two cross 16x16 with 32-bit signed addition and subtraction simultaneously. The results are written into Rd. * * **Description**:\n * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to * the corresponding 32-bit elements in a third register. The addition result may be saturated. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom * * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) { * mul1 = aop.H[1] s* bop.H[0]; * mul2 = aop.H[0] s* bop.H[1]; * res = sat.q31(dop + mul1 - mul2); * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKMAXDS(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmaxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMAXDS ===== */ /* ===== Inline Function Start for DKMSDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMSDA (Two 16x16 with 32-bit Signed Double Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMSDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two 16x16 with 32-bit signed double subtraction simultaneously. The results are written into Rd. * * **Description**:\n * it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom * * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) { * mul1 = aop.H[1] s* bop.H[0]; * mul2 = aop.H[0] s* bop.H[1]; * res = sat.q31(dop - mul1 - mul2); * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKMSDA(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmsda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMSDA ===== */ /* ===== Inline Function Start for DKMSXDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMSXDA (Two Cross 16x16 with 32-bit Signed Double Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMSXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two cross 16x16 with 32-bit signed double subtraction simultaneously. The results are written into Rd. * * **Description**:\n * It multiplies the bottom 16-bit content of the 32-bit elements of Rs1 * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom * * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) { * mul1 = aop.H[1] s* bop.H[0]; * mul2 = aop.H[0] s* bop.H[1]; * res = sat.q31(dop - mul1 - mul2); * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKMSXDA(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmsxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMSXDA ===== */ /* ===== Inline Function Start for DSMAQA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMAQA (Four Signed 8x8 with 32-bit Signed Add) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMAQA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do four signed 8x8 with 32-bit signed addition simultaneously. The results are written into Rd. * * **Description**:\n * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the signed * content of the corresponding 32-bit chunks of Rd. The final results are written back to the * corresponding 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom * * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) { * m0 = aop.B[0] s* bop.B[0]; * m1 = aop.B[1] s* bop.B[1]; * m2 = aop.B[2] s* bop.B[2]; * m3 = aop.B[3] s* bop.B[3]; * res = dop + m0 + m1 + m2 + m3; * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSMAQA(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dsmaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DSMAQA ===== */ /* ===== Inline Function Start for DSMAQA.SU ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMAQA.SU (Four Signed 8 x Unsigned 8 with 32-bit Signed Add) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMAQA.SU Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do four Signed 8 x Unsigned 8 with 32-bit unsigned addition simultaneously. The results are written into Rd. * * **Description**:\n * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the unsigned * content of the corresponding 32-bit chunks of Rd. The final results are written back to the * corresponding 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom * * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) { * m0 = aop.B[0] su* bop.B[0]; * m1 = aop.B[1] su* bop.B[1]; * m2 = aop.B[2] su* bop.B[2]; * m3 = aop.B[3] su* bop.B[3]; * res = dop + m0 + m1 + m2 + m3; * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DSMAQA_SU(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dsmaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DSMAQA.SU ===== */ /* ===== Inline Function Start for DUMAQA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DUMAQA (Four Unsigned 8x8 with 32-bit Unsigned Add) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DUMAQA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do four unsigned 8x8 with 32-bit unsigned addition simultaneously. The results are written into Rd. * * **Description**:\n * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the * unsigned content of the corresponding 32-bit chunks of Rd. The final results are written back to the * corresponding 32-bit chunks in Rd. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom * * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) { * m0 = aop.B[0] su* bop.B[0]; * m1 = aop.B[1] su* bop.B[1]; * m2 = aop.B[2] su* bop.B[2]; * m3 = aop.B[3] su* bop.B[3]; * res = dop + m0 + m1 + m2 + m3; * } * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DUMAQA(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dumaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DUMAQA ===== */ /* ===== Inline Function Start for DKMDA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMDA32 (Two Signed 32x32 with 64-bit Saturation Add) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMDA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32x32 add the signed multiplication results with Q63 saturation. The results are written into Rd. * * **Description**:\n * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1 * with the top 32-bit element of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * t0 = op1b s* op2b; * t1 = op1t s* op2t; * Rd = concat(rest, resb); * x=0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DKMDA32(unsigned long long a, unsigned long long b) { long long result; __ASM volatile("dkmda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKMDA32 ===== */ /* ===== Inline Function Start for DKMXDA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Add) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMXDA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two cross signed 32x32 and add the signed multiplication results with Q63 saturation. The results are written into Rd. * * **Description**:\n * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1 * with the bottom 32-bit element of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * t01 = op1b s* op2t; * t10 = op1t s* op2b; * Rd = sat.q63(t01 + t10); * x=0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DKMXDA32(unsigned long long a, unsigned long long b) { long long result; __ASM volatile("dkmxda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DKMXDA32 ===== */ /* ===== Inline Function Start for DKMADA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMADA32 (Two Signed 32x32 with 64-bit Saturation Add) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMADA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The results are written into Rd. * * **Description**:\n * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1 * with the top 32-bit element of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * t01 = op1b s* op2b; * t10 = op1t s* op2t; * Rd = sat.q63(t01 + t10); * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DKMADA32(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMADA32 ===== */ /* ===== Inline Function Start for DKMAXDA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMAXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Add) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMAXDA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two cross signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The * results are written into Rd. * * **Description**:\n * It multiplies the top 32-bit element in Rs1 with the bottom 32-bit * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1 * with the top 32-bit element in Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * t01 = op1b s* op2t; * t10 = op1t s* op2b; * Rd = sat.q63(Rd + t01 + t10); * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DKMAXDA32(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmaxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMAXDA32 ===== */ /* ===== Inline Function Start for DKMADS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMADS32 (Two Signed 32x32 with 64-bit Saturation Add and Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMADS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication results * and add a third register with Q63 saturation. The results are written into Rd. * * **Description**:\n * It multiplies the top 32-bit element in Rs1 with the bottom 32-bit * element in Rs2 and then subtracts the result to the result of multiplying the top 32-bit element in Rs1 * with the top 32-bit element in Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * t0 = op1b s* op2b; * t1 = op1t s* op2t; * Rd = sat.q63(Rd - t0 + t1); * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DKMADS32(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmads32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMADS32 ===== */ /* ===== Inline Function Start for DKMADRS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMADRS32 (Two Signed 32x32 with 64-bit Saturation Revered Add and Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMADRS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The results * are written into Rd.Do two signed 32x32 and subtraction the top signed multiplication results and add bottom signed * multiplication results and add a third register with Q63 saturation. The results are written into Rd. * * **Description**:\n * It multiplies the top 32-bit element in Rs1 with the top 32-bit * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit * element in Rs1 with the bottom 32-bit element in Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * t0 = op1b s* op2b; * t1 = op1t s* op2t; * Rd = sat.q63(Rd + t0 - t1); * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DKMADRS32(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmadrs32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMADRS32 ===== */ /* ===== Inline Function Start for DKMAXDS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMAXDS32 (Two Cross Signed 32x32 with 64-bit Saturation Add and Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMAXDS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication results * and add a third register with Q63 saturation. The results are written into Rd. * * **Description**:\n * It multiplies the bottom 32-bit element in Rs1 with the top 32-bit * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in * Rs1 with the bottom 32-bit element in Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * t01 = op1b s* op2t; * t10 = op1t s* op2b; * Rd = sat.q63(Rd - t01 + t10); * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DKMAXDS32(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmaxds32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMAXDS32 ===== */ /* ===== Inline Function Start for DKMSDA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMSDA32 (Two Signed 32x32 with 64-bit Saturation Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMSDA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32x32 and subtraction the top signed multiplication results and subtraction bottom signed multiplication * results and add a third register with Q63 saturation. The results are written into Rd. * * **Description**:\n * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * t0 = op1b s* op2b; * t1 = op1t s* op2t; * Rd = sat.q63(Rd - t0 - t1); * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DKMSDA32(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmsda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMSDA32 ===== */ /* ===== Inline Function Start for DKMSXDA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMSXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DKMSXDA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two cross signed 32x32 and subtraction the top signed multiplication results and subtraction bottom signed multiplication * results and add a third register with Q63 saturation. The results are written into Rd. * * **Description**:\n * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * t0 = op1b s* op2t; * t1 = op1t s* op2b; * Rd = sat.q63(Rd - t0 - t1); * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DKMSXDA32(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmsxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMSXDA32 ===== */ /* ===== Inline Function Start for DSMDS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMDS32 (Two Signed 32x32 with 64-bit Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMDS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication. The * results are written into Rd. * * **Description**:\n * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of * Rs1 with the top 32-bit element of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * t0 = op1b s* op2t; * t1 = op1t s* op2b; * Rd = t1 - t0; * x=0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMDS32(unsigned long long a, unsigned long long b) { long long result; __ASM volatile("dsmds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMDS32 ===== */ /* ===== Inline Function Start for DSMDRS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMDRS32 (Two Signed 32x32 with 64-bit Revered Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMDRS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32x32 and subtraction the top signed multiplication results and add bottom signed multiplication. The results are written into Rd * * **Description**:\n * It multiplies the top 32-bit element of Rs1 with the top 32-bit * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit * element of Rs1 with the bottom 32-bit element of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * t0 = op1b s* op2b; * t1 = op1t s* op2t; * Rd = t1 - t0; * x=0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMDRS32(unsigned long long a, unsigned long long b) { long long result; __ASM volatile("dsmdrs32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMDRS32 ===== */ /* ===== Inline Function Start for DSMXDS32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMXDS32 (Two Cross Signed 32x32 with 64-bit Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMXDS32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two cross signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication. * The results are written into Rd. * * **Description**:\n * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of * Rs1 with the bottom 32-bit element of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * t01 = op1b s* op2t; * t10 = op1t s* op2b; * Rd = t1 - t0; * x=0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMXDS32(unsigned long long a, unsigned long long b) { long long result; __ASM volatile("dsmxds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMXDS32 ===== */ /* ===== Inline Function Start for DSMALDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMALDA (Four Signed 16x16 with 64-bit Add) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMALDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do four signed 16x16 and add signed multiplication results and a third register. The results are written into Rd. * * **Description**:\n * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with * the top 16-bit content of Rs2 with unlimited precision * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * m0 = op1b.H[0] s* op2b.H[0]; * m1 = op1b.H[1] s* op2b.H[1]; * m2 = op1t.H[0] s* op2t.H[0]; * m3 = op1t.H[1] s* op2t.H[1]; * * Rd = Rd + m0 + m1 + m2 + m3; * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMALDA(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dsmalda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DSMALDA ===== */ /* ===== Inline Function Start for DSMALXDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMALXDA (Four Signed 16x16 with 64-bit Add) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMALXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do four cross signed 16x16 and add signed multiplication results and a third register. The results are written into Rd. * * **Description**:\n * It multiplies the top 16-bit content of Rs1 with the bottom 16-bit * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1 * with the top 16-bit content of Rs2 with unlimited precision. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * m0 = op1b.H[0] s* op2b.H[1]; * m1 = op1b.H[1] s* op2b.H[0]; * m2 = op1t.H[0] s* op2t.H[1]; * m3 = op1t.H[1] s* op2t.H[0]; * * Rd = Rd + m0 + m1 + m2 + m3; * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMALXDA(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dsmalxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DSMALXDA ===== */ /* ===== Inline Function Start for DSMALDS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMALDS (Four Signed 16x16 with 64-bit Add and Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMALDS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do four signed 16x16 and add and subtraction signed multiplication results and a third register. The results are written into Rd. * * **Description**:\n * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of * Rs1 with the top 16-bit content of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * m0 = op1b.H[1] s* op2b.H[1]; * m1 = op1b.H[0] s* op2b.H[0]; * m2 = op1t.H[1] s* op2t.H[1]; * m3 = op1t.H[0] s* op2t.H[0]; * * Rd = Rd + m0 - m1 + m2 - m3; * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMALDS(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dsmalds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DSMALDS ===== */ /* ===== Inline Function Start for DSMALDRS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMALDRS (Four Signed 16x16 with 64-bit Add and Revered Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMALDRS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16x16 and add and revered subtraction signed multiplication results and a third register. The results are written into Rd. * * **Description**:\n * It multiplies the top 16-bit content of Rs1 with the top 16-bit content * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1 * with the bottom 16-bit content of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * m0 = op1b.H[0] s* op2b.H[0]; * m1 = op1b.H[1] s* op2b.H[1]; * m2 = op1t.H[0] s* op2t.H[0]; * m3 = op1t.H[1] s* op2t.H[1]; * * Rd = Rd + m0 - m1 + m2 - m3; * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMALDRS(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dsmaldrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DSMALDRS ===== */ /* ===== Inline Function Start for DSMALXDS ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMALXDS (Four Cross Signed 16x16 with 64-bit Add and Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMALXDS Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do four cross signed 16x16 and add and subtraction signed multiplication results and a third register. The results are written into Rd. * * **Description**:\n * It multiplies the bottom 16-bit content of Rs1 with the top 16-bit * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of * Rs1 with the bottom 16-bit content of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * m0 = op1b.H[1] s* op2b.H[0]; * m1 = op1b.H[0] s* op2b.H[1]; * m2 = op1t.H[1] s* op2t.H[0]; * m3 = op1t.H[0] s* op2t.H[1]; * * Rd = Rd + m0 - m1 + m2 - m3; * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMALXDS(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dsmalxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DSMALXDS ===== */ /* ===== Inline Function Start for DSMSLDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMSLDA (Four Signed 16x16 with 64-bit Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMSLDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do four signed 16x16 and subtraction signed multiplication results and add a third register. The results are written into Rd. * * **Description**:\n * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * m0 = op1b.H[0] s* op2b.H[0]; * m1 = op1b.H[1] s* op2b.H[1]; * m2 = op1t.H[0] s* op2t.H[0]; * m3 = op1t.H[1] s* op2t.H[1]; * * Rd = Rd - m0 - m1 - m2 - m3; * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMSLDA(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dsmslda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DSMSLDA ===== */ /* ===== Inline Function Start for DSMSLXDA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMSLXDA (Four Cross Signed 16x16 with 64-bit Sub) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMSLXDA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do four signed 16x16 and subtraction signed multiplication results and add a third register. The results are written into Rd. * * **Description**:\n * It multiplies the top 16-bit content of Rs1 with the bottom 16-bit * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * m0 = op1b.H[0] s* op2b.H[1]; * m1 = op1b.H[1] s* op2b.H[0]; * m2 = op1t.H[0] s* op2t.H[1]; * m3 = op1t.H[1] s* op2t.H[0]; * * Rd = Rd - m0 - m1 - m2 - m3; * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMSLXDA(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dsmslxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DSMSLXDA ===== */ /* ===== Inline Function Start for DDSMAQA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DDSMAQA (Eight Signed 8x8 with 64-bit Add) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DDSMAQA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do eight signed 8x8 and add signed multiplication results and a third register. The results are written into Rd. * * **Description**:\n * Do eight signed 8-bit multiplications from eight 8-bit chunks of two registers; and then adds * the eight 16-bit results and the content of 64-bit chunks of a third register. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * m0 = op1b.B[0] s* op2b.B[0]; * m1 = op1b.B[1] s* op2b.B[1]; * m2 = op1b.B[2] s* op2b.B[2]; * m3 = op1b.B[3] s* op2b.B[3]; * m4 = op1t.B[0] s* op2t.B[0]; * m5 = op1t.B[1] s* op2t.B[1]; * m6 = op1t.B[2] s* op2t.B[2]; * m7 = op1t.B[3] s* op2t.B[3]; * * s0 = m0 + m1 + m2 + m3; * s1 = m4 + m5 + m6 + m7; * Rd = Rd + s0 + s1; * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DDSMAQA(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("ddsmaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DDSMAQA ===== */ /* ===== Inline Function Start for DDSMAQA.SU ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DDSMAQA.SU (Eight Signed 8 x Unsigned 8 with 64-bit Add) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DDSMAQA.SU Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do eight signed 8 x unsigned 8 and add signed multiplication results and a third register. The results are written into Rd. * * **Description**:\n * Do eight signed 8 x unsigned 8 and add signed multiplication results and a third register; and then adds * the eight 16-bit results and the content of 64-bit chunks of a third register. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * m0 = op1b.B[0] su* op2b.B[0]; * m1 = op1b.B[1] su* op2b.B[1]; * m2 = op1b.B[2] su* op2b.B[2]; * m3 = op1b.B[3] su* op2b.B[3]; * m4 = op1t.B[0] su* op2t.B[0]; * m5 = op1t.B[1] su* op2t.B[1]; * m6 = op1t.B[2] su* op2t.B[2]; * m7 = op1t.B[3] su* op2t.B[3]; * * s0 = m0 + m1 + m2 + m3; * s1 = m4 + m5 + m6 + m7; * Rd = Rd + s0 + s1; * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DDSMAQA_SU(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("ddsmaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DDSMAQA.SU ===== */ /* ===== Inline Function Start for DDUMAQA ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DDUMAQA (Eight Unsigned 8x8 with 64-bit Unsigned Add) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DDUMAQA Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do eight unsigned 8x8 and add unsigned multiplication results and a third register. The results are written into Rd. * * **Description**:\n * Do eight unsigned 8x8 and add unsigned multiplication results and a third register; and then adds * the eight 16-bit results and the content of 64-bit chunks of a third register. * * **Operations**:\n * ~~~ * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom * * m0 = op1b.B[0] u* op2b.B[0]; * m1 = op1b.B[1] u* op2b.B[1]; * m2 = op1b.B[2] u* op2b.B[2]; * m3 = op1b.B[3] u* op2b.B[3]; * m4 = op1t.B[0] u* op2t.B[0]; * m5 = op1t.B[1] u* op2t.B[1]; * m6 = op1t.B[2] u* op2t.B[2]; * m7 = op1t.B[3] u* op2t.B[3]; * * s0 = m0 + m1 + m2 + m3; * s1 = m4 + m5 + m6 + m7; * Rd = Rd + s0 + s1; * x=0 * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DDUMAQA(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("ddumaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DDUMAQA ===== */ /* ===== Inline Function Start for DSMA32.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMA32.u (64-bit SIMD 32-bit Signed Multiply Addition With Rounding and Clip) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMA32.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32x32 and add signed multiplication results with Rounding, then right shift 32-bit and clip q63 to q31. * The result is written to Rd. * * **Description**:\n * For the `DSMA32.u` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the top 32-bit Q31 * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with * the bottom 32-bit Q31 content of 64-bit chunks in Rs2. * Then, do the addtion for the results above and perform the addtional rounding operations, and then move the data to the right * by 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd. * * **Operations**:\n * ~~~ * Rd = (q31_t)((Rs1.W[x] s* Rs2.W[x] + Rs1.W[x + 1] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32); * x=0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_DSMA32_U(unsigned long long a, unsigned long long b) { long result; __ASM volatile("dsma32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMA32.u ===== */ /* ===== Inline Function Start for DSMXS32.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMXS32.u (64-bit SIMD 32-bit Signed Multiply Cross Subtraction With Rounding and Clip) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMXS32.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two cross signed 32x32 and sub signed multiplication results with Rounding, then right shift 32-bit and clip q63 to * q31. The result is written to Rd. * * **Description**:\n * For the `DSMXS32.u` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit * Q31 content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 * with the top 32-bit Q31 content of 64-bit chunks in Rs2. * Then, do the subtraction for the results above and perform the addtional rounding operations, and then move the data to the right by * 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd. * * **Operations**:\n * ~~~ * Rd = (q31_t)((Rs1.W[x + 1] s* Rs2.W[x] - Rs1.W[x] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32); * x=0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_DSMXS32_U(unsigned long long a, unsigned long long b) { long result; __ASM volatile("dsmxs32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMXS32.u ===== */ /* ===== Inline Function Start for DSMXA32.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMXA32.u (64-bit SIMD 32-bit Signed Cross Multiply Addition with Rounding and Clip) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMXA32.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two cross signed 32x32 and add signed multiplication results with Rounding, then right shift 32-bit and clip q63 to * q31. The result is written to Rd. * * **Description**:\n * For the `DSMXA32.u` instruction,multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit Q31 * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with * the top 32-bit Q31 content of 64-bit chunks in Rs2. * Then, do the addtion for the results above and perform the addtional rounding operations, and then move the data to the right * by 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd. * * **Operations**:\n * ~~~ * Rd = (q31_t)((Rs1.W[x + 1] s* Rs2.W[x] + Rs1.W[x] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32); * x=0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_DSMXA32_U(unsigned long long a, unsigned long long b) { long result; __ASM volatile("dsmxa32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMXA32.u ===== */ /* ===== Inline Function Start for DSMS32.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMS32.u (64-bit SIMD 32-bit Signed Multiply Subtraction with Rounding and Clip) * \details * **Type**: DSP * * **Syntax**:\n * ~~~ * DSMS32.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 32x32 and sub signed multiplication results with Rounding, then right shift 32-bit and clip q63 to q31. The * result is written to Rd. * * **Description**:\n * For the `DSMS32.u` instruction, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit * Q31 content of 64-bit chunks in Rs2. At the same time, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with * the top 32-bit Q31 content of 64-bit chunks in Rs2. * Then, do the subtraction for the results above and perform the addtional rounding operations, and then move the data to the right by * 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd. * * **Operations**:\n * ~~~ * Rd = (q31_t)((Rs1.W[x] s* Rs2.W[x] - Rs1.W[x + 1] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32); * x=0 * ~~~ * * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_DSMS32_U(unsigned long long a, unsigned long long b) { long result; __ASM volatile("dsms32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); return result; } /* ===== Inline Function End for DSMS32.u ===== */ /* ===== Inline Function Start for DSMADA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMADA16 (Signed Multiply Two Halfs and Two Adds 32-bit) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMADA16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications of two 32-bit registers; and then adds the 32-bit results and the 32-bit value of an * even/odd pair of registers together. * * DSMADA16: rt pair+ top*top + bottom*bottom * * **Description**:\n * This instruction multiplies the per 16-bit content of the 32-bit elements of Rs1 with the corresponding 16-bit content of * the 32-bit elements of Rs2. The result is added to the 32-bit value of an even/odd pair of registers specified by Rd(4,1). * The 32-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 32-bit value of the * register-pair are treated as signed integers. * * **Operations**:\n * ~~~ * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]); * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]); * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]); * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]); * Rd.W = Rd.W + SE32(Mres0[0][31:0]) + SE32(Mres1[0][31:0]) + SE32(Mres0[1][31:0]) + SE32(Mres1[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_DSMADA16(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dsmada16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return (long)t; } /* ===== Inline Function End for DSMADA16 ===== */ /* ===== Inline Function Start for DSMAXDA16 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMAXDA16 (Signed Crossed Multiply Two Halfs and Two Adds 32-bit) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMAXDA16 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two signed 16-bit multiplications of two 32-bit registers; and then adds the 32-bit results and the 32-bit value of an * even/odd pair of registers together. * * DSMAXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements) * * **Description**:\n * This instruction crossly multiplies the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit * elements of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of the 32-bit elements of * Rs1 with the top 16-bit content of the 32-bit elements of Rs2 with unlimited precision. The result is added to the 64-bit * value of an even/odd pair of registers specified by Rd(4,1).The 64-bit addition result is clipped to 32-bit result. * * **Operations**:\n * ~~~ * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]); * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]); * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]); * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]); * Rd.W = Rd.W + SE32(Mres0[0][31:0]) + SE32(Mres1[0][31:0]) + SE32(Mres0[1][31:0]) + SE32(Mres1[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_DSMAXDA16(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dsmaxda16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return (long)t; } /* ===== Inline Function End for DSMAXDA16 ===== */ /* ===== Inline Function Start for DKSMS32.u ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKSMS32.u (Two Signed Multiply Shift-clip and Saturation with Rounding) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKSMS32.u Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Computes saturated multiplication of two pairs of q31 type with shifted rounding. * * **Description**:\n * Compute the multiplication of Rs1 and Rs2 of type q31_t, intercept [47:16] for the resulting 64-bit product * to get the 32-bit number, then add 1 to it to do rounding, and finally saturate the result after rounding. * * **Operations**:\n * ~~~ * Mres[x][63:0] = Rs1.W[x] s* Rs2.W[x]; * Round[x][32:0] = Mres[x][47:15] + 1; * Rd.W[x] = sat.31(Rd.W[x] + Round[x][32:1]); * x=1...0 * ~~~ * * \param [in] t unsigned long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE unsigned long long __RV_DKSMS32_U(unsigned long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dksms32.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKSMS32.u ===== */ /* ===== Inline Function Start for DMADA32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DMADA32 ((Two Cross Signed 32x32 with 64-bit Add and Clip to 32-bit) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DMADA32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Do two cross signed 32x32 and add the signed multiplication results to q63, then clip the q63 result to q31 , the final results * are written into Rd. * * **Description**:\n * For the `DMADA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit element in Rs2 and * then adds the result to the result of multiplying the bottom 32-bit element in Rs1 with the top 32-bit element in Rs2, then * clip the q63 result to q31. * * **Operations**:\n * ~~~ * res = (q31_t)((((q63_t) Rd.w[0] << 32) + (q63_t)Rs1.w[0] s* Rs2.w[1] + (q63_t)Rs1.w[1] s* Rs2.w[0]) s>> 32); * rd = res; * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long type */ __STATIC_FORCEINLINE long __RV_DMADA32(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return (long)t; } /* ===== Inline Function End for DMADA32 ===== */ /* ===== Inline Function Start for DSMALBB ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMALBB (Signed Multiply Bottom Halfs & Add 64-bit) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMALBB Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result * is written back to the register-pair. * * DSMALBB: rt pair + bottom*bottom (all 32-bit elements) * * **Description**:\n * For the `DSMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit content of Rs2.The * multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd. * * **Operations**:\n * ~~~ * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0]; * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0]; * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMALBB(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dsmalbb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DSMALBB ===== */ /* ===== Inline Function Start for DSMALBT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMALBT (Signed Multiply Bottom Half & Top Half & Add 64-bit) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMALBT Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result * is written back to the register-pair. * * DSMALBT: rt pair + bottom*top (all 32-bit elements) * * **Description**:\n * For the `DSMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit * content of the 32-bit elements of Rs2. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers * * **Operations**:\n * ~~~ * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1]; * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1]; * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMALBT(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dsmalbt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DSMALBT ===== */ /* ===== Inline Function Start for DSMALTT ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DSMALTT (Signed Multiply Top Half & Add 64-bit) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DSMALTT Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result * is written back to the register-pair. * * DSMALTT: DSMALTT rt pair + top*top (all 32-bit elements) * * **Description**:\n * For the `DSMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit * content of the 32-bit elements of Rs2. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers. * * **Operations**:\n * ~~~ * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1]; * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1]; * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DSMALTT(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dsmaltt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DSMALTT ===== */ /* ===== Inline Function Start for DKMABB32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMABB32 (Saturating Signed Multiply Bottom Words & Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKMABB32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content * of 64-bit data in the third register. The addition result may besaturated and is written to the third register. * * DKMABB32: rd + bottom*bottom * * **Description**:\n * For the `DKMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit element in Rs2 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * res = Rd + (Rs1.W[0] * Rs2.W[0]); * if (res > (2^63)-1) { * res = (2^63)-1; * OV = 1; * } else if (res < -2^63) { * res = -2^63; * OV = 1; * } * Rd = res; * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DKMABB32(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmabb32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMABB32 ===== */ /* ===== Inline Function Start for DKMABT32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMABT32 (Saturating Signed Multiply Bottom & Top Words & Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKMABT32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content * of 64-bit data in the third register. The addition result may be saturated and is written to the third register. * * DKMABT32: rd + bottom*top * * **Description**:\n * For the `DKMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit element in Rs2 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * res = Rd + (Rs1.W[0] * Rs2.W[1]); * if (res > (2^63)-1) { * res = (2^63)-1; * OV = 1; * } else if (res < -2^63) { * res = -2^63; * OV = 1; * } * Rd = res; * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in long long type */ __STATIC_FORCEINLINE long long __RV_DKMABT32(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmabt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMABT32 ===== */ /* ===== Inline Function Start for DKMATT32 ===== */ /** * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 * \brief DKMATT32 (Saturating Signed Multiply Bottom & Top Words & Add) * \details * **Type**: SIMD * * **Syntax**:\n * ~~~ * DKMATT32 Rd, Rs1, Rs2 * ~~~ * * **Purpose**:\n * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content * of 64-bit data in the third register. The addition result may be saturated and is written to the third register. * * DKMATT32: rd + top*top * * **Description**:\n * For the `DKMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit element in Rs2 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers. * * **Operations**:\n * ~~~ * res = Rd + (Rs1.W[1] * Rs2.W[1]); * if (res > (2^63)-1) { * res = (2^63)-1; * OV = 1; * } else if (res < -2^63) { * res = -2^63; * OV = 1; * } * Rd = res; * ~~~ * * \param [in] t long long type of value stored in t * \param [in] a unsigned long long type of value stored in a * \param [in] b unsigned long long type of value stored in b * \return value stored in unsigned long long type */ __STATIC_FORCEINLINE long long __RV_DKMATT32(long long t, unsigned long long a, unsigned long long b) { __ASM volatile("dkmatt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); return t; } /* ===== Inline Function End for DKMATT32 ===== */ #endif /* __RISCV_XLEN == 32 */ #elif defined (__ICCRISCV__) #if __riscv_xlen == 32 #include "iar_nds32_intrinsic.h" #elif __riscv_xlen == 64 #include "iar_nds64_intrinsic.h" #else #error "Unexpected RISC-V XLEN size." #endif /* __riscv_xlen == 32 */ #pragma language=save #pragma language=extended // Redefine those compatible instruction name supplied by IAR #define __RV_CLROV __nds__clrov #define __RV_RDOV __nds__rdov #define __RV_ADD8 __nds__add8 #define __RV_SUB8 __nds__sub8 #define __RV_ADD16 __nds__add16 #define __RV_SUB16 __nds__sub16 #define __RV_ADD64 __nds__add64 #define __RV_SUB64 __nds__sub64 #define __RV_RADD8 __nds__radd8 #define __RV_RSUB8 __nds__rsub8 #define __RV_RADD16 __nds__radd16 #define __RV_RSUB16 __nds__rsub16 #define __RV_RADD64 __nds__radd64 #define __RV_RSUB64 __nds__rsub64 #define __RV_RADDW __nds__raddw #define __RV_RSUBW __nds__rsubw #define __RV_URADD8 __nds__uradd8 #define __RV_URSUB8 __nds__ursub8 #define __RV_URADD16 __nds__uradd16 #define __RV_URSUB16 __nds__ursub16 #define __RV_URADD64 __nds__uradd64 #define __RV_URSUB64 __nds__ursub64 #define __RV_URADDW __nds__uraddw #define __RV_URSUBW __nds__ursubw #define __RV_KADD8 __nds__kadd8 #define __RV_KSUB8 __nds__ksub8 #define __RV_KADD16 __nds__kadd16 #define __RV_KSUB16 __nds__ksub16 #define __RV_KADD64 __nds__kadd64 #define __RV_KSUB64 __nds__ksub64 #define __RV_KADDH __nds__kaddh #define __RV_KSUBH __nds__ksubh #define __RV_KADDW __nds__kaddw #define __RV_KSUBW __nds__ksubw #define __RV_UKADD8 __nds__ukadd8 #define __RV_UKSUB8 __nds__uksub8 #define __RV_UKADD16 __nds__ukadd16 #define __RV_UKSUB16 __nds__uksub16 #define __RV_UKADD64 __nds__ukadd64 #define __RV_UKSUB64 __nds__uksub64 #define __RV_UKADDH __nds__ukaddh #define __RV_UKSUBH __nds__uksubh #define __RV_UKADDW __nds__ukaddw #define __RV_UKSUBW __nds__uksubw #define __RV_CRAS16 __nds__cras16 #define __RV_CRSA16 __nds__crsa16 #define __RV_RCRAS16 __nds__rcras16 #define __RV_RCRSA16 __nds__rcrsa16 #define __RV_URCRAS16 __nds__urcras16 #define __RV_URCRSA16 __nds__urcrsa16 #define __RV_KCRAS16 __nds__kcras16 #define __RV_KCRSA16 __nds__kcrsa16 #define __RV_UKCRAS16 __nds__ukcras16 #define __RV_UKCRSA16 __nds__ukcrsa16 #define __RV_SRA8 __nds__sra8 #define __RV_SRAI8 __nds__sra8 #define __RV_SRA16 __nds__sra16 #define __RV_SRAI16 __nds__sra16 #define __RV_SRL8 __nds__srl8 #define __RV_SRL16 __nds__srl16 #define __RV_SLL8 __nds__sll8 #define __RV_SLL16 __nds__sll16 #define __RV_SRA_U __nds__sra_u #define __RV_SRA8_U __nds__sra8_u #define __RV_SRA16_U __nds__sra16_u #define __RV_SRL8_U __nds__srl8_u #define __RV_SRL16_U __nds__srl16_u #define __RV_KSLL8 __nds__ksll8 #define __RV_KSLL16 __nds__ksll16 #define __RV_KSLLW __nds__ksllw #define __RV_KSLRA8 __nds__kslra8 #define __RV_KSLRA8_U __nds__kslra8_u #define __RV_KSLRA16 __nds__kslra16 #define __RV_KSLRA16_U __nds__kslra16_u #define __RV_KSLRAW __nds__kslraw #define __RV_KSLRAW_U __nds__kslraw_u #define __RV_CMPEQ8 __nds__cmpeq8 #define __RV_CMPEQ16 __nds__cmpeq16 #define __RV_SCMPLE8 __nds__scmple8 #define __RV_SCMPLE16 __nds__scmple16 #define __RV_SCMPLT8 __nds__scmplt8 #define __RV_SCMPLT16 __nds__scmplt16 #define __RV_UCMPLE8 __nds__ucmple8 #define __RV_UCMPLE16 __nds__ucmple16 #define __RV_UCMPLT8 __nds__ucmplt8 #define __RV_UCMPLT16 __nds__ucmplt16 #define __RV_SMUL8 __nds__smul8 #define __RV_UMUL8 __nds__umul8 #define __RV_SMUL16 __nds__smul16 #define __RV_UMUL16 __nds__umul16 #define __RV_SMULX8 __nds__smulx8 #define __RV_UMULX8 __nds__umulx8 #define __RV_SMULX16 __nds__smulx16 #define __RV_UMULX16 __nds__umulx16 #define __RV_KHM8 __nds__khm8 #define __RV_KHMX8 __nds__khmx8 #define __RV_KHM16 __nds__khm16 #define __RV_KHMX16 __nds__khmx16 #define __RV_MULR64 __nds__mulr64 #define __RV_MULSR64 __nds__mulsr64 #define __RV_SMMUL __nds__smmul #define __RV_SMMUL_U __nds__smmul_u #define __RV_WEXT __nds__wext #define __RV_SUNPKD810 __nds__sunpkd810 #define __RV_SUNPKD820 __nds__sunpkd820 #define __RV_SUNPKD830 __nds__sunpkd830 #define __RV_SUNPKD831 __nds__sunpkd831 #define __RV_SUNPKD832 __nds__sunpkd832 #define __RV_ZUNPKD810 __nds__zunpkd810 #define __RV_ZUNPKD820 __nds__zunpkd820 #define __RV_ZUNPKD830 __nds__zunpkd830 #define __RV_ZUNPKD831 __nds__zunpkd831 #define __RV_ZUNPKD832 __nds__zunpkd832 #define __RV_PKBB16 __nds__pkbb16 #define __RV_PKBT16 __nds__pkbt16 #define __RV_PKTT16 __nds__pktt16 #define __RV_PKTB16 __nds__pktb16 #define __RV_KMMAC __nds__kmmac #define __RV_KMMAC_U __nds__kmmac_u #define __RV_KMMSB __nds__kmmsb #define __RV_KMMSB_U __nds__kmmsb_u #define __RV_KWMMUL __nds__kwmmul #define __RV_KWMMUL_U __nds__kwmmul_u #define __RV_SMMWB __nds__smmwb #define __RV_SMMWB_U __nds__smmwb_u #define __RV_SMMWT __nds__smmwt #define __RV_SMMWT_U __nds__smmwt_u #define __RV_KMMAWB __nds__kmmawb #define __RV_KMMAWB_U __nds__kmmawb_u #define __RV_KMMAWT __nds__kmmawt #define __RV_KMMAWT_U __nds__kmmawt_u #define __RV_KMMWB2 __nds__kmmwb2 #define __RV_KMMWB2_U __nds__kmmwb2_u #define __RV_KMMWT2 __nds__kmmwt2 #define __RV_KMMWT2_U __nds__kmmwt2_u #define __RV_KMMAWB2 __nds__kmmawb2 #define __RV_KMMAWB2_U __nds__kmmawb2_u #define __RV_KMMAWT2 __nds__kmmawt2 #define __RV_KMMAWT2_U __nds__kmmawt2_u #define __RV_SMBB16 __nds__smbb16 #define __RV_SMBT16 __nds__smbt16 #define __RV_SMTT16 __nds__smtt16 #define __RV_KMDA __nds__kmda #define __RV_KMXDA __nds__kmxda #define __RV_SMDS __nds__smds #define __RV_SMDRS __nds__smdrs #define __RV_SMXDS __nds__smxds #define __RV_KMABB __nds__kmabb #define __RV_KMABT __nds__kmabt #define __RV_KMATT __nds__kmatt #define __RV_KMADA __nds__kmada #define __RV_KMAXDA __nds__kmaxda #define __RV_KMADS __nds__kmads #define __RV_KMADRS __nds__kmadrs #define __RV_KMAXDS __nds__kmaxds #define __RV_KMSDA __nds__kmsda #define __RV_KMSXDA __nds__kmsxda #define __RV_SMAL __nds__smal #define __RV_SMAQA __nds__smaqa #define __RV_UMAQA __nds__umaqa #define __RV_SMAQA_SU __nds__smaqa_su #define __RV_SMAR64 __nds__smar64 #define __RV_SMSR64 __nds__smsr64 #define __RV_UMAR64 __nds__umar64 #define __RV_UMSR64 __nds__umsr64 #define __RV_KMAR64 __nds__kmar64 #define __RV_KMSR64 __nds__kmsr64 #define __RV_UKMAR64 __nds__ukmar64 #define __RV_UKMSR64 __nds__ukmsr64 #define __RV_SMALBB __nds__smalbb #define __RV_SMALBT __nds__smalbt #define __RV_SMALTT __nds__smaltt #define __RV_SMALDA __nds__smalda #define __RV_SMALXDA __nds__smalxda #define __RV_SMALDS __nds__smalds #define __RV_SMALDRS __nds__smaldrs #define __RV_SMALXDS __nds__smalxds #define __RV_SMSLDA __nds__smslda #define __RV_SMSLXDA __nds__smslxda #define __RV_MINW __nds__minw #define __RV_MAXW __nds__maxw #define __RV_SMIN8 __nds__smin8 #define __RV_SMAX8 __nds__smax8 #define __RV_SMIN16 __nds__smin16 #define __RV_SMAX16 __nds__smax16 #define __RV_UMIN8 __nds__umin8 #define __RV_UMAX8 __nds__umax8 #define __RV_UMIN16 __nds__umin16 #define __RV_UMAX16 __nds__umax16 #define __RV_KABS8 __nds__kabs8 #define __RV_KABS16 __nds__kabs16 #define __RV_KABSW __nds__kabsw #define __RV_SCLIP8 __nds__sclip8 #define __RV_SCLIP16 __nds__sclip16 #define __RV_SCLIP32 __nds__sclip32 #define __RV_UCLIP8 __nds__uclip8 #define __RV_UCLIP16 __nds__uclip16 #define __RV_UCLIP32 __nds__uclip32 #define __RV_CLO8 __nds__clo8 #define __RV_CLO16 __nds__clo16 #define __RV_CLO32 __nds__clo32 #define __RV_CLZ8 __nds__clz8 #define __RV_CLZ16 __nds__clz16 #define __RV_CLZ32 __nds__clz32 #define __RV_CLRS8 __nds__clrs8 #define __RV_CLRS16 __nds__clrs16 #define __RV_CLRS32 __nds__clrs32 #define __RV_SWAP8 __nds__swap8 #define __RV_SWAP16 __nds__swap16 #define __RV_KHMBB __nds__khmbb #define __RV_KHMBT __nds__khmbt #define __RV_KHMTT __nds__khmtt #define __RV_KDMBB __nds__kdmbb #define __RV_KDMBT __nds__kdmbt #define __RV_KDMTT __nds__kdmtt #define __RV_KDMABB __nds__kdmabb #define __RV_KDMABT __nds__kdmabt #define __RV_KDMATT __nds__kdmatt #define __RV_MADDR32 __nds__maddr32 #define __RV_MSUBR32 __nds__msubr32 #define __RV_PBSAD __nds__pbsad #define __RV_PBSADA __nds__pbsada #define __RV_AVE __nds__ave #define __RV_BITREV __nds__bitrev #define __RV_INSB __nds__insb #if (__riscv_xlen == 64) #define __RV_ADD32 __nds__add32 #define __RV_SUB32 __nds__sub32 #define __RV_RADD32 __nds__radd32 #define __RV_RSUB32 __nds__rsub32 #define __RV_URADD32 __nds__uradd32 #define __RV_URSUB32 __nds__ursub32 #define __RV_KADD32 __nds__kadd32 #define __RV_KSUB32 __nds__ksub32 #define __RV_UKADD32 __nds__ukadd32 #define __RV_UKSUB32 __nds__uksub32 #define __RV_CRAS32 __nds__cras32 #define __RV_CRSA32 __nds__crsa32 #define __RV_RCRAS32 __nds__rcras32 #define __RV_RCRSA32 __nds__rcrsa32 #define __RV_URCRAS32 __nds__urcras32 #define __RV_URCRSA32 __nds__urcrsa32 #define __RV_KCRAS32 __nds__kcras32 #define __RV_KCRSA32 __nds__kcrsa32 #define __RV_UKCRAS32 __nds__ukcras32 #define __RV_UKCRSA32 __nds__ukcrsa32 #define __RV_SRA32 __nds__sra32 #define __RV_SRAI32 __nds__sra32 #define __RV_SRL32 __nds__srl32 #define __RV_SLL32 __nds__sll32 #define __RV_SLLI32 __nds__sll32 #define __RV_SRAW_U __nds__sraw_u #define __RV_SRA32_U __nds__sra32_u #define __RV_SRL32_U __nds__srl32_u #define __RV_KSLL32 __nds__ksll32 #define __RV_KSLRA32 __nds__kslra32 #define __RV_KSLRA32_U __nds__kslra32_u #define __RV_SMBB32 __nds__smbb32 #define __RV_SMBT32 __nds__smbt32 #define __RV_SMTT32 __nds__smtt32 #define __RV_PKBB32 __nds__pkbb32 #define __RV_PKBT32 __nds__pkbt32 #define __RV_PKTT32 __nds__pktt32 #define __RV_PKTB32 __nds__pktb32 #define __RV_SMIN32 __nds__smin32 #define __RV_SMAX32 __nds__smax32 #define __RV_UMIN32 __nds__umin32 #define __RV_UMAX32 __nds__umax32 #define __RV_KABS32 __nds__kabs32 #define __RV_KHMBB16 __nds__khmbb16 #define __RV_KHMBT16 __nds__khmbt16 #define __RV_KHMTT16 __nds__khmtt16 #define __RV_KDMBB16 __nds__kdmbb16 #define __RV_KDMBT16 __nds__kdmbt16 #define __RV_KDMTT16 __nds__kdmtt16 #define __RV_KDMABB16 __nds__kdmabb16 #define __RV_KDMABT16 __nds__kdmabt16 #define __RV_KDMATT16 __nds__kdmatt16 #define __RV_KMABB32 __nds__kmabb32 #define __RV_KMABT32 __nds__kmabt32 #define __RV_KMATT32 __nds__kmatt32 #define __RV_KMDA32 __nds__kmda32 #define __RV_KMXDA32 __nds__kmxda32 #define __RV_KMADA32 __nds__kmada32 #define __RV_KMAXDA32 __nds__kmaxda32 #define __RV_KMADS32 __nds__kmads32 #define __RV_KMADRS32 __nds__kmadrs32 #define __RV_KMAXDS32 __nds__kmaxds32 #define __RV_KMSDA32 __nds__kmsda32 #define __RV_KMSXDA32 __nds__kmsxda32 #define __RV_SMDS32 __nds__smds32 #define __RV_SMDRS32 __nds__smdrs32 #define __RV_SMXDS32 __nds__smxds32 #endif /* __riscv_xlen == 64 */ // For now, the P-extention version of IAR IDE is 0.5.0, but Nuclei's supports 0.5.4 // so Nuclei supplies a workround to add custom instructions of those not natively // supported by the IAR Assembler. Note that __RV_BPICK remains to be implemented in future. // And we only implement Xxldsp Nuclei custom instruction set, bpick not implemented, expdxx // implemented in c, not via .insn variant #pragma inline=forced_no_body unsigned long __RV_STAS16(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x7A, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_RSTAS16(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x5A, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_KSTAS16(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x62, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_URSTAS16(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x6A, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_UKSTAS16(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x72, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_STSA16(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x7B, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_RSTSA16(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x5B, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_KSTSA16(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x63, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_URSTSA16(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x6B, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_UKSTSA16(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x73, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } // #pragma inline=forced_no_body // unsigned long __RV_BPICK(unsigned long a, unsigned long b, unsigned long c) { // TODO: remains to be done // } // RV64 only #pragma inline=forced_no_body unsigned long __RV_STAS32(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x78, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_RSTAS32(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x58, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_KSTAS32(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x60, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_URSTAS32(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x68, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_UKSTAS32(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x70, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_STSA32(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x79, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_RSTSA32(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x59, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_KSTSA32(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x61, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_URSTSA32(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x69, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_UKSTSA32(unsigned long a, unsigned long b) { unsigned long r; __asm(".insn r 0x7F, 0x2, 0x71, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) ); return r; } #pragma inline=forced_no_body unsigned long __RV_EXPD80(unsigned long a) { return __EXPD_BYTE((uint8_t)(a & 0xff)); } #pragma inline=forced_no_body unsigned long __RV_EXPD81(unsigned long a) { return __EXPD_BYTE((uint8_t)((a >> 8) & 0xff)); } #pragma inline=forced_no_body unsigned long __RV_EXPD82(unsigned long a) { return __EXPD_BYTE((uint8_t)((a >> 16) & 0xff)); } #pragma inline=forced_no_body unsigned long __RV_EXPD83(unsigned long a) { return __EXPD_BYTE((uint8_t)((a >> 24) & 0xff)); } #if __RISCV_XLEN == 64 // RV64 only #pragma inline=forced_no_body unsigned long __RV_EXPD84(unsigned long a) { return __EXPD_BYTE((uint8_t)((a >> 32) & 0xff)); } #pragma inline=forced_no_body unsigned long __RV_EXPD85(unsigned long a) { return __EXPD_BYTE((uint8_t)((a >> 40) & 0xff)); } #pragma inline=forced_no_body unsigned long __RV_EXPD86(unsigned long a) { return __EXPD_BYTE((uint8_t)((a >> 48) & 0xff)); } #pragma inline=forced_no_body unsigned long __RV_EXPD87(unsigned long a) { return __EXPD_BYTE((uint8_t)((a >> 56) & 0xff)); } #endif #pragma language=restore #else #error Unknown compiler #endif /* __ICCRISCV__ */ /* XXXXX ARM Compatiable SIMD API XXXXX */ /** \brief Q setting quad 8-bit saturating addition. */ #define __QADD8(x, y) __RV_KADD8(x, y) /** \brief Q setting quad 8-bit saturating subtract. */ #define __QSUB8(x, y) __RV_KSUB8((x), (y)) /** \brief Q setting dual 16-bit saturating addition. */ #define __QADD16(x, y) __RV_KADD16((x), (y)) /** \brief Dual 16-bit signed addition with halved results. */ #define __SHADD16(x, y) __RV_RADD16((x), (y)) /** \brief Q setting dual 16-bit saturating subtract. */ #define __QSUB16(x, y) __RV_KSUB16((x), (y)) /** \brief Dual 16-bit signed subtraction with halved results. */ #define __SHSUB16(x, y) __RV_RSUB16((x), (y)) /** \brief Q setting dual 16-bit add and subtract with exchange. */ #define __QASX(x, y) __RV_KCRAS16((x), (y)) /** \brief Dual 16-bit signed addition and subtraction with halved results.*/ #define __SHASX(x, y) __RV_RCRAS16((x), (y)) /** \brief Q setting dual 16-bit subtract and add with exchange. */ #define __QSAX(x, y) __RV_KCRSA16((x), (y)) /** \brief Dual 16-bit signed subtraction and addition with halved results.*/ #define __SHSAX(x, y) __RV_RCRSA16((x), (y)) /** \brief Dual 16-bit signed multiply with exchange returning difference. */ #define __SMUSDX(x, y) __RV_SMXDS((y), (x)) /** \brief Q setting sum of dual 16-bit signed multiply with exchange. */ __STATIC_FORCEINLINE long __SMUADX (unsigned long op1, unsigned long op2) { return __RV_KMXDA(op1, op2); } /** \brief Q setting saturating add. */ #define __QADD(x, y) __RV_KADDW((x), (y)) /** \brief Q setting saturating subtract. */ #define __QSUB(x, y) __RV_KSUBW((x), (y)) /** \brief Q setting dual 16-bit signed multiply with single 32-bit accumulator. */ __STATIC_FORCEINLINE long __SMLAD(unsigned long op1, unsigned long op2, long acc) { return __RV_KMADA(acc, op1, op2); } /** \brief Q setting pre-exchanged dual 16-bit signed multiply with single 32-bit accumulator. */ __STATIC_FORCEINLINE long __SMLADX(unsigned long op1, unsigned long op2, long acc) { return __RV_KMAXDA(acc, op1, op2); } /** \brief Q setting dual 16-bit signed multiply with exchange subtract with 32-bit accumulate. */ __STATIC_FORCEINLINE long __SMLSDX(unsigned long op1, unsigned long op2, long acc) { return (acc - __RV_SMXDS(op1, op2)); } /** \brief Dual 16-bit signed multiply with single 64-bit accumulator. */ __STATIC_FORCEINLINE long long __SMLALD(unsigned long op1, unsigned long op2, long long acc) { return __RV_SMALDA(acc, op1, op2); } /** \brief Dual 16-bit signed multiply with exchange with single 64-bit accumulator. */ __STATIC_FORCEINLINE long long __SMLALDX(unsigned long op1, unsigned long op2, long long acc) { return __RV_SMALXDA(acc, op1, op2); } /** \brief Q setting sum of dual 16-bit signed multiply. */ __STATIC_FORCEINLINE long __SMUAD(unsigned long op1, unsigned long op2) { return __RV_KMDA(op1, op2); } /** \brief Dual 16-bit signed multiply returning difference. */ __STATIC_FORCEINLINE long __SMUSD(unsigned long op1, unsigned long op2) { return __RV_SMDRS(op1, op2); } /** \brief Dual extract 8-bits and sign extend each to 16-bits. */ #define __SXTB16(x) __RV_SUNPKD820(x) /** \brief Dual extracted 8-bit to 16-bit signed addition. TODO Need test */ __STATIC_FORCEINLINE unsigned long __SXTAB16(unsigned long op1, unsigned long op2) { return __RV_ADD16(op1, __RV_SUNPKD820(op2)); } #define __SXTAB16_RORn(ARG1, ARG2, ROTATE) __SXTAB16(ARG1, __ROR(ARG2, ROTATE)) /** \brief 32-bit signed multiply with 32-bit truncated accumulator. */ __STATIC_FORCEINLINE long __SMMLA(long op1, long op2, long acc) { long mul; mul = __RV_SMMUL(op1, op2); return (acc + mul); } #define __DKHM8 __RV_DKHM8 #define __DKHM16 __RV_DKHM16 #define __DKSUB16 __RV_DKSUB16 #define __SMAQA __RV_SMAQA #define __MULSR64 __RV_MULSR64 #define __DQADD8 __RV_DKADD8 #define __DQSUB8 __RV_DKSUB8 #define __DKADD16 __RV_DKADD16 #define __PKBB16 __RV_PKBB16 #define __DKSLRA16 __RV_DKSLRA16 #define __DKSLRA8 __RV_DKSLRA8 #define __KABSW __RV_KABSW #define __DKABS8 __RV_DKABS8 #define __DKABS16 __RV_DKABS16 #define __SMALDA __RV_SMALDA #define __SMSLDA __RV_SMSLDA #define __SMALBB __RV_SMALBB #define __SUB64 __RV_SUB64 #define __ADD64 __RV_ADD64 #define __SMBB16 __RV_SMBB16 #define __SMBT16 __RV_SMBT16 #define __SMTT16 __RV_SMTT16 #define __EXPD80 __RV_EXPD80 #define __SMAX8 __RV_SMAX8 #define __SMAX16 __RV_SMAX16 #define __PKTT16 __RV_PKTT16 #define __KADD16 __RV_KADD16 #define __SADD16 __RV_ADD16 #define __SSUB8 __RV_KSUB8 #define __SADD8 __RV_KADD8 #define __USAT16 __RV_UCLIP16 #define __SMALTT __RV_SMALTT /** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3. */ #define __PKHBT(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_PKTB16(ARG2, ARG1) : \ (ARG3 == 16) ? __RV_PKBB16(ARG2, ARG1) : \ (((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \ ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL))) /** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3. */ #define __PKHTB(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_PKTB16(ARG1, ARG2) : \ (ARG3 == 16) ? __RV_PKTT16(ARG1, ARG2) : \ (((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \ ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL))) #if __RISCV_XLEN == 64 /** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3, and also combines the [47:32] of val1 with bits[63:48] of val2 with the val3, finally pack the two new 32-bits to 64-bit. */ #define __PKHBT64(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_PKTB16(ARG2, ARG1) : \ (ARG3 == 16) ? __RV_PKBB16(ARG2, ARG1) : \ ((int64_t)((((uint32_t)((uint64_t)ARG1 >> 32)) & 0x0000FFFFUL) | \ ((((uint32_t)((uint64_t)ARG2 >> 32)) << (ARG3)) & 0xFFFF0000UL)) << 32) | \ ((int64_t)(((((uint32_t)(ARG1))) & 0x0000FFFFUL) | \ ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)) & 0xFFFFFFFFUL)) /** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3, and also combines bits [63:48] of val1 with bits [47:32] of val2 with the val3, finally pack the two new 32bits to 64bits. */ #define __PKHTB64(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_PKTB16(ARG1, ARG2) : \ (ARG3 == 16) ? __RV_PKTT16(ARG1, ARG2) : \ ((uint64_t)(((uint32_t)((uint64_t)ARG1 >> 32) & 0xFFFF0000UL) | \ ((((uint32_t)((uint64_t)ARG2 >> 32)) >> (ARG3)) & 0x0000FFFFUL)) << 32) | \ ((uint64_t)(((uint32_t)(ARG1) & 0xFFFF0000UL) | \ ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)) & 0xFFFFFFFFUL)) #else /** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3, and also combines the [47:32] of val1 with bits[63:48] of val2 with the val3, finally pack the two new 32-bits to 64-bit. */ #define __PKHBT64(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_DPKTB16(ARG2, ARG1) : \ (ARG3 == 16) ? __RV_DPKBB16(ARG2, ARG1) : \ ((int64_t)((((uint32_t)((uint64_t)ARG1 >> 32)) & 0x0000FFFFUL) | \ ((((uint32_t)((uint64_t)ARG2 >> 32)) << (ARG3)) & 0xFFFF0000UL)) << 32) | \ ((int64_t)(((((uint32_t)(ARG1))) & 0x0000FFFFUL) | \ ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)) & 0xFFFFFFFFUL)) /** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3, and also combines bits [63:48] of val1 with bits [47:32] of val2 with the val3, finally pack the two new 32bits to 64bits. */ #define __PKHTB64(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_DPKTB16(ARG1, ARG2) : \ (ARG3 == 16) ? __RV_DPKTT16(ARG1, ARG2) : \ ((uint64_t)(((uint32_t)((uint64_t)ARG1 >> 32) & 0xFFFF0000UL) | \ ((((uint32_t)((uint64_t)ARG2 >> 32)) >> (ARG3)) & 0x0000FFFFUL)) << 32) | \ ((uint64_t)(((uint32_t)(ARG1) & 0xFFFF0000UL) | \ ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)) & 0xFFFFFFFFUL)) #endif /* __RISCV_XLEN == 64 */ /** first rotate then extract. This is more suitable for arm compiler for it can rotate and extract in one command*/ #define __SXTB16_RORn(ARG1, ARG2) __RV_SUNPKD820(__ROR(ARG1, ARG2)) #endif /* defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) */ #ifdef __cplusplus } #endif #endif /* __CORE_FEATURE_DSP__ */