Nuclei-Software
/
nuclei-sdk
зеркало из https://github-proxy.rt-thread.io/Nuclei-Software/nuclei-sdk.git


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668156691567015671156721567315674156751567615677156781567915680156811568215683156841568515686156871568815689156901569115692156931569415695156961569715698156991570015701157021570315704157051570615707157081570915710157111571215713157141571515716157171571815719157201572115722157231572415725157261572715728157291573015731157321573315734157351573615737157381573915740157411574215743157441574515746157471574815749157501575115752157531575415755157561575715758157591576015761157621576315764157651576615767157681576915770157711577215773157741577515776157771577815779157801578115782157831578415785157861578715788157891579015791157921579315794157951579615797157981579915800158011580215803158041580515806158071580815809158101581115812158131581415815158161581715818158191582015821158221582315824158251582615827158281582915830158311583215833158341583515836158371583815839158401584115842158431584415845158461584715848158491585015851158521585315854158551585615857158581585915860158611586215863158641586515866158671586815869158701587115872158731587415875158761587715878158791588015881158821588315884158851588615887158881588915890158911589215893158941589515896158971589815899159001590115902159031590415905159061590715908159091591015911159121591315914159151591615917159181591915920159211592215923159241592515926159271592815929159301593115932159331593415935159361593715938159391594015941159421594315944159451594615947159481594915950159511595215953159541595515956159571595815959159601596115962159631596415965159661596715968159691597015971159721597315974159751597615977159781597915980159811598215983159841598515986159871598815989159901599115992159931599415995159961599715998159991600016001160021600316004160051600616007160081600916010160111601216013160141601516016160171601816019160201602116022160231602416025160261602716028160291603016031160321603316034160351603616037160381603916040160411604216043160441604516046160471604816049160501605116052160531605416055160561605716058160591606016061160621606316064160651606616067160681606916070160711607216073160741607516076160771607816079160801608116082160831608416085160861608716088160891609016091160921609316094160951609616097160981609916100161011610216103161041610516106161071610816109161101611116112161131611416115161161611716118161191612016121161221612316124161251612616127161281612916130161311613216133161341613516136161371613816139161401614116142161431614416145161461614716148161491615016151161521615316154161551615616157161581615916160161611616216163161641616516166161671616816169161701617116172161731617416175161761617716178161791618016181161821618316184161851618616187161881618916190161911619216193161941619516196161971619816199162001620116202162031620416205162061620716208162091621016211162121621316214162151621616217162181621916220162211622216223162241622516226162271622816229162301623116232162331623416235162361623716238162391624016241162421624316244162451624616247162481624916250162511625216253162541625516256162571625816259162601626116262162631626416265162661626716268162691627016271162721627316274162751627616277162781627916280162811628216283162841628516286162871628816289162901629116292162931629416295162961629716298162991630016301163021630316304163051630616307163081630916310163111631216313163141631516316163171631816319163201632116322163231632416325163261632716328163291633016331163321633316334163351633616337163381633916340163411634216343163441634516346163471634816349163501635116352163531635416355163561635716358163591636016361163621636316364163651636616367163681636916370163711637216373163741637516376163771637816379163801638116382163831638416385163861638716388163891639016391163921639316394163951639616397163981639916400164011640216403164041640516406164071640816409164101641116412164131641416415164161641716418164191642016421164221642316424164251642616427164281642916430164311643216433164341643516436164371643816439164401644116442164431644416445164461644716448164491645016451164521645316454164551645616457164581645916460164611646216463164641646516466164671646816469164701647116472164731647416475164761647716478164791648016481164821648316484164851648616487164881648916490164911649216493164941649516496164971649816499165001650116502165031650416505165061650716508165091651016511165121651316514165151651616517165181651916520165211652216523165241652516526165271652816529165301653116532165331653416535165361653716538165391654016541165421654316544165451654616547165481654916550165511655216553165541655516556165571655816559165601656116562165631656416565165661656716568165691657016571165721657316574165751657616577165781657916580165811658216583165841658516586165871658816589165901659116592165931659416595165961659716598165991660016601166021660316604166051660616607166081660916610166111661216613166141661516616166171661816619166201662116622166231662416625166261662716628166291663016631166321663316634166351663616637166381663916640166411664216643166441664516646166471664816649166501665116652166531665416655166561665716658166591666016661166621666316664166651666616667166681666916670166711667216673166741667516676166771667816679166801668116682166831668416685166861668716688166891669016691166921669316694166951669616697166981669916700167011670216703167041670516706167071670816709167101671116712167131671416715167161671716718167191672016721167221672316724167251672616727167281672916730167311673216733167341673516736167371673816739167401674116742167431674416745167461674716748167491675016751167521675316754167551675616757167581675916760167611676216763167641676516766167671676816769167701677116772167731677416775167761677716778167791678016781167821678316784167851678616787167881678916790167911679216793167941679516796167971679816799168001680116802168031680416805168061680716808168091681016811168121681316814168151681616817168181681916820168211682216823168241682516826168271682816829168301683116832168331683416835168361683716838168391684016841168421684316844168451684616847168481684916850168511685216853168541685516856168571685816859168601686116862168631686416865168661686716868168691687016871168721687316874168751687616877168781687916880168811688216883168841688516886168871688816889168901689116892168931689416895168961689716898168991690016901169021690316904169051690616907169081690916910169111691216913169141691516916169171691816919169201692116922169231692416925169261692716928169291693016931169321693316934169351693616937169381693916940169411694216943169441694516946169471694816949169501695116952169531695416955169561695716958169591696016961169621696316964169651696616967169681696916970169711697216973169741697516976169771697816979169801698116982169831698416985169861698716988169891699016991169921699316994169951699616997169981699917000170011700217003170041700517006170071700817009170101701117012170131701417015170161701717018170191702017021170221702317024170251702617027170281702917030170311703217033170341703517036170371703817039170401704117042170431704417045170461704717048170491705017051170521705317054170551705617057170581705917060170611706217063170641706517066170671706817069170701707117072170731707417075170761707717078170791708017081170821708317084170851708617087170881708917090170911709217093170941709517096170971709817099171001710117102171031710417105171061710717108171091711017111171121711317114171151711617117171181711917120171211712217123171241712517126171271712817129171301713117132171331713417135171361713717138171391714017141171421714317144171451714617147171481714917150171511715217153171541715517156171571715817159171601716117162171631716417165171661716717168171691717017171171721717317174171751717617177171781717917180171811718217183171841718517186171871718817189171901719117192171931719417195171961719717198171991720017201172021720317204172051720617207172081720917210172111721217213172141721517216172171721817219172201722117222172231722417225172261722717228172291723017231172321723317234172351723617237172381723917240172411724217243172441724517246172471724817249172501725117252172531725417255172561725717258172591726017261172621726317264172651726617267172681726917270172711727217273172741727517276172771727817279172801728117282172831728417285172861728717288172891729017291172921729317294172951729617297172981729917300173011730217303173041730517306173071730817309173101731117312173131731417315173161731717318173191732017321173221732317324173251732617327173281732917330173311733217333173341733517336173371733817339173401734117342173431734417345173461734717348173491735017351173521735317354173551735617357173581735917360173611736217363173641736517366173671736817369173701737117372173731737417375173761737717378173791738017381173821738317384173851738617387173881738917390173911739217393173941739517396173971739817399174001740117402174031740417405174061740717408174091741017411174121741317414174151741617417174181741917420174211742217423174241742517426174271742817429174301743117432174331743417435174361743717438174391744017441174421744317444174451744617447174481744917450174511745217453174541745517456174571745817459174601746117462174631746417465174661746717468174691747017471174721747317474174751747617477174781747917480174811748217483174841748517486174871748817489174901749117492174931749417495174961749717498174991750017501175021750317504175051750617507175081750917510175111751217513175141751517516175171751817519175201752117522175231752417525175261752717528175291753017531175321753317534175351753617537175381753917540175411754217543175441754517546175471754817549175501755117552175531755417555175561755717558175591756017561175621756317564175651756617567175681756917570175711757217573175741757517576175771757817579175801758117582175831758417585175861758717588175891759017591175921759317594175951759617597175981759917600176011760217603176041760517606176071760817609176101761117612176131761417615176161761717618176191762017621176221762317624176251762617627176281762917630176311763217633176341763517636176371763817639176401764117642176431764417645176461764717648176491765017651176521765317654176551765617657176581765917660176611766217663176641766517666176671766817669176701767117672176731767417675176761767717678176791768017681176821768317684176851768617687176881768917690176911769217693176941769517696176971769817699177001770117702177031770417705177061770717708177091771017711177121771317714177151771617717177181771917720177211772217723177241772517726177271772817729177301773117732177331773417735177361773717738177391774017741177421774317744177451774617747177481774917750177511775217753177541775517756177571775817759177601776117762177631776417765177661776717768177691777017771177721777317774177751777617777177781777917780177811778217783177841778517786177871778817789177901779117792177931779417795177961779717798177991780017801178021780317804178051780617807178081780917810178111781217813178141781517816178171781817819178201782117822178231782417825178261782717828178291783017831178321783317834178351783617837178381783917840178411784217843178441784517846178471784817849178501785117852178531785417855178561785717858178591786017861178621786317864178651786617867178681786917870178711787217873178741787517876178771787817879178801788117882178831788417885178861788717888178891789017891178921789317894178951789617897178981789917900179011790217903179041790517906179071790817909179101791117912179131791417915179161791717918179191792017921179221792317924179251792617927179281792917930179311793217933179341793517936179371793817939179401794117942179431794417945179461794717948179491795017951179521795317954179551795617957179581795917960179611796217963179641796517966179671796817969179701797117972179731797417975179761797717978179791798017981179821798317984179851798617987179881798917990179911799217993179941799517996179971799817999180001800118002180031800418005180061800718008180091801018011180121801318014180151801618017180181801918020180211802218023180241802518026180271802818029180301803118032180331803418035180361803718038180391804018041180421804318044180451804618047180481804918050180511805218053180541805518056180571805818059180601806118062180631806418065180661806718068180691807018071180721807318074180751807618077180781807918080180811808218083180841808518086180871808818089180901809118092180931809418095180961809718098180991810018101181021810318104181051810618107181081810918110181111811218113181141811518116181171811818119181201812118122181231812418125181261812718128181291813018131181321813318134181351813618137181381813918140181411814218143181441814518146181471814818149181501815118152181531815418155181561815718158181591816018161181621816318164181651816618167181681816918170181711817218173181741817518176181771817818179181801818118182181831818418185181861818718188181891819018191181921819318194181951819618197181981819918200182011820218203182041820518206182071820818209182101821118212182131821418215182161821718218182191822018221182221822318224182251822618227182281822918230182311823218233182341823518236182371823818239182401824118242182431824418245182461824718248182491825018251182521825318254182551825618257182581825918260182611826218263182641826518266182671826818269182701827118272182731827418275182761827718278182791828018281182821828318284182851828618287182881828918290182911829218293182941829518296182971829818299183001830118302183031830418305183061830718308183091831018311183121831318314183151831618317183181831918320183211832218323183241832518326183271832818329183301833118332183331833418335183361833718338183391834018341183421834318344183451834618347183481834918350183511835218353183541835518356183571835818359183601836118362183631836418365183661836718368183691837018371183721837318374183751837618377183781837918380183811838218383183841838518386183871838818389183901839118392183931839418395183961839718398183991840018401184021840318404184051840618407184081840918410184111841218413184141841518416184171841818419184201842118422184231842418425184261842718428184291843018431184321843318434184351843618437184381843918440184411844218443184441844518446184471844818449184501845118452184531845418455184561845718458184591846018461184621846318464184651846618467184681846918470184711847218473184741847518476184771847818479184801848118482184831848418485184861848718488184891849018491184921849318494184951849618497184981849918500185011850218503185041850518506185071850818509185101851118512185131851418515185161851718518185191852018521185221852318524185251852618527185281852918530185311853218533185341853518536185371853818539185401854118542185431854418545185461854718548185491855018551185521855318554185551855618557185581855918560185611856218563185641856518566185671856818569185701857118572185731857418575185761857718578185791858018581185821858318584185851858618587185881858918590185911859218593185941859518596185971859818599186001860118602186031860418605186061860718608186091861018611186121861318614186151861618617186181861918620186211862218623186241862518626186271862818629186301863118632186331863418635186361863718638186391864018641186421864318644186451864618647186481864918650186511865218653186541865518656186571865818659186601866118662186631866418665186661866718668186691867018671186721867318674186751867618677186781867918680186811868218683186841868518686186871868818689186901869118692186931869418695186961869718698186991870018701187021870318704187051870618707187081870918710187111871218713187141871518716187171871818719187201872118722187231872418725187261872718728187291873018731187321873318734187351873618737187381873918740187411874218743187441874518746187471874818749187501875118752187531875418755187561875718758187591876018761187621876318764187651876618767187681876918770187711877218773187741877518776187771877818779187801878118782187831878418785187861878718788187891879018791187921879318794187951879618797187981879918800188011880218803188041880518806188071880818809188101881118812188131881418815188161881718818188191882018821188221882318824188251882618827188281882918830188311883218833188341883518836188371883818839188401884118842188431884418845188461884718848188491885018851188521885318854188551885618857188581885918860188611886218863188641886518866188671886818869188701887118872188731887418875188761887718878188791888018881188821888318884188851888618887188881888918890188911889218893188941889518896188971889818899189001890118902189031890418905189061890718908189091891018911189121891318914189151891618917189181891918920189211892218923189241892518926189271892818929189301893118932189331893418935189361893718938189391894018941189421894318944189451894618947189481894918950189511895218953189541895518956189571895818959189601896118962189631896418965189661896718968189691897018971189721897318974189751897618977189781897918980189811898218983189841898518986189871898818989189901899118992189931899418995189961899718998189991900019001190021900319004190051900619007190081900919010190111901219013190141901519016190171901819019190201902119022190231902419025190261902719028190291903019031190321903319034190351903619037190381903919040190411904219043190441904519046190471904819049190501905119052190531905419055190561905719058190591906019061190621906319064190651906619067190681906919070190711907219073190741907519076190771907819079190801908119082190831908419085190861908719088190891909019091190921909319094190951909619097190981909919100191011910219103191041910519106191071910819109191101911119112191131911419115191161911719118191191912019121191221912319124191251912619127191281912919130191311913219133191341913519136191371913819139191401914119142191431914419145191461914719148191491915019151191521915319154191551915619157191581915919160191611916219163191641916519166191671916819169191701917119172191731917419175191761917719178191791918019181191821918319184191851918619187191881918919190191911919219193191941919519196191971919819199192001920119202192031920419205192061920719208192091921019211192121921319214192151921619217192181921919220192211922219223192241922519226192271922819229192301923119232192331923419235192361923719238192391924019241192421924319244192451924619247192481924919250192511925219253192541925519256192571925819259192601926119262192631926419265192661926719268192691927019271192721927319274192751927619277192781927919280192811928219283192841928519286192871928819289192901929119292192931929419295192961929719298192991930019301193021930319304193051930619307193081930919310193111931219313193141931519316193171931819319193201932119322193231932419325193261932719328193291933019331193321933319334193351933619337193381933919340193411934219343193441934519346193471934819349193501935119352193531935419355193561935719358193591936019361193621936319364193651936619367193681936919370193711937219373193741937519376193771937819379193801938119382193831938419385193861938719388193891939019391193921939319394193951939619397193981939919400194011940219403194041940519406194071940819409194101941119412194131941419415194161941719418194191942019421194221942319424194251942619427194281942919430194311943219433194341943519436194371943819439194401944119442194431944419445194461944719448194491945019451194521945319454194551945619457194581945919460194611946219463194641946519466194671946819469194701947119472194731947419475194761947719478194791948019481194821948319484194851948619487194881948919490194911949219493194941949519496194971949819499195001950119502195031950419505195061950719508195091951019511195121951319514195151951619517195181951919520195211952219523195241952519526195271952819529195301953119532195331953419535195361953719538195391954019541195421954319544195451954619547195481954919550195511955219553195541955519556195571955819559195601956119562195631956419565195661956719568195691957019571195721957319574195751957619577195781957919580195811958219583195841958519586195871958819589195901959119592195931959419595195961959719598195991960019601196021960319604196051960619607196081960919610196111961219613196141961519616196171961819619196201962119622196231962419625196261962719628196291963019631196321963319634196351963619637196381963919640196411964219643196441964519646196471964819649196501965119652196531965419655196561965719658196591966019661196621966319664196651966619667196681966919670196711967219673196741967519676196771967819679196801968119682196831968419685196861968719688196891969019691196921969319694196951969619697196981969919700197011970219703197041970519706197071970819709197101971119712197131971419715197161971719718197191972019721197221972319724197251972619727197281972919730197311973219733197341973519736197371973819739197401974119742197431974419745197461974719748197491975019751197521975319754197551975619757197581975919760197611976219763197641976519766197671976819769197701977119772197731977419775197761977719778197791978019781197821978319784197851978619787197881978919790197911979219793197941979519796197971979819799198001980119802198031980419805198061980719808198091981019811198121981319814198151981619817198181981919820198211982219823198241982519826198271982819829198301983119832198331983419835198361983719838198391984019841198421984319844198451984619847198481984919850198511985219853198541985519856198571985819859198601986119862198631986419865198661986719868198691987019871198721987319874198751987619877198781987919880198811988219883198841988519886198871988819889198901989119892198931989419895198961989719898198991990019901199021990319904199051990619907199081990919910199111991219913199141991519916199171991819919199201992119922199231992419925199261992719928199291993019931199321993319934199351993619937199381993919940199411994219943199441994519946199471994819949199501995119952199531995419955199561995719958199591996019961199621996319964199651996619967199681996919970199711997219973199741997519976199771997819979199801998119982199831998419985199861998719988199891999019991199921999319994199951999619997199981999920000200012000220003200042000520006200072000820009200102001120012200132001420015200162001720018200192002020021200222002320024200252002620027200282002920030200312003220033200342003520036200372003820039200402004120042200432004420045200462004720048200492005020051200522005320054200552005620057200582005920060200612006220063200642006520066200672006820069200702007120072200732007420075200762007720078200792008020081200822008320084200852008620087200882008920090200912009220093200942009520096200972009820099201002010120102201032010420105201062010720108201092011020111201122011320114201152011620117201182011920120201212012220123201242012520126201272012820129201302013120132201332013420135201362013720138201392014020141201422014320144201452014620147201482014920150201512015220153201542015520156201572015820159201602016120162201632016420165201662016720168201692017020171201722017320174201752017620177201782017920180201812018220183201842018520186201872018820189201902019120192201932019420195201962019720198201992020020201202022020320204202052020620207202082020920210202112021220213202142021520216202172021820219202202022120222202232022420225202262022720228202292023020231202322023320234202352023620237202382023920240202412024220243202442024520246202472024820249202502025120252202532025420255202562025720258202592026020261202622026320264202652026620267202682026920270202712027220273202742027520276202772027820279202802028120282202832028420285202862028720288202892029020291202922029320294202952029620297202982029920300203012030220303203042030520306203072030820309203102031120312203132031420315203162031720318203192032020321203222032320324203252032620327203282032920330203312033220333203342033520336203372033820339203402034120342203432034420345203462034720348203492035020351203522035320354203552035620357203582035920360203612036220363203642036520366203672036820369203702037120372203732037420375203762037720378203792038020381203822038320384203852038620387203882038920390203912039220393203942039520396203972039820399204002040120402204032040420405204062040720408204092041020411204122041320414204152041620417204182041920420204212042220423204242042520426204272042820429204302043120432204332043420435204362043720438204392044020441204422044320444204452044620447204482044920450204512045220453204542045520456204572045820459204602046120462204632046420465204662046720468204692047020471204722047320474204752047620477204782047920480204812048220483204842048520486204872048820489204902049120492204932049420495204962049720498204992050020501205022050320504205052050620507205082050920510205112051220513205142051520516205172051820519205202052120522205232052420525205262052720528205292053020531205322053320534205352053620537205382053920540205412054220543205442054520546205472054820549205502055120552205532055420555205562055720558205592056020561205622056320564205652056620567205682056920570205712057220573205742057520576205772057820579205802058120582205832058420585205862058720588205892059020591205922059320594205952059620597205982059920600206012060220603206042060520606206072060820609206102061120612206132061420615206162061720618206192062020621206222062320624206252062620627206282062920630206312063220633206342063520636206372063820639206402064120642206432064420645206462064720648206492065020651206522065320654206552065620657206582065920660206612066220663206642066520666206672066820669206702067120672206732067420675206762067720678206792068020681206822068320684206852068620687206882068920690206912069220693206942069520696206972069820699207002070120702207032070420705207062070720708207092071020711207122071320714207152071620717207182071920720207212072220723207242072520726207272072820729207302073120732207332073420735207362073720738207392074020741207422074320744207452074620747207482074920750207512075220753207542075520756207572075820759207602076120762207632076420765207662076720768207692077020771207722077320774207752077620777207782077920780207812078220783207842078520786207872078820789207902079120792207932079420795207962079720798207992080020801208022080320804208052080620807208082080920810208112081220813208142081520816208172081820819208202082120822208232082420825208262082720828208292083020831208322083320834208352083620837208382083920840208412084220843208442084520846208472084820849208502085120852208532085420855208562085720858208592086020861208622086320864208652086620867208682086920870208712087220873208742087520876208772087820879208802088120882208832088420885208862088720888208892089020891208922089320894208952089620897208982089920900209012090220903209042090520906209072090820909209102091120912209132091420915209162091720918209192092020921209222092320924209252092620927209282092920930209312093220933209342093520936209372093820939209402094120942209432094420945209462094720948209492095020951209522095320954209552095620957209582095920960209612096220963209642096520966209672096820969209702097120972209732097420975209762097720978209792098020981209822098320984209852098620987209882098920990209912099220993209942099520996209972099820999210002100121002210032100421005210062100721008210092101021011210122101321014210152101621017210182101921020210212102221023210242102521026210272102821029210302103121032210332103421035210362103721038210392104021041210422104321044210452104621047210482104921050210512105221053210542105521056210572105821059210602106121062210632106421065210662106721068210692107021071210722107321074210752107621077210782107921080210812108221083210842108521086210872108821089210902109121092210932109421095210962109721098210992110021101211022110321104211052110621107211082110921110211112111221113211142111521116211172111821119211202112121122211232112421125211262112721128211292113021131211322113321134211352113621137211382113921140211412114221143211442114521146211472114821149211502115121152211532115421155211562115721158211592116021161211622116321164211652116621167211682116921170211712117221173211742117521176211772117821179211802118121182211832118421185211862118721188211892119021191211922119321194211952119621197211982119921200212012120221203212042120521206212072120821209212102121121212212132121421215212162121721218212192122021221212222122321224212252122621227212282122921230212312123221233212342123521236212372123821239212402124121242212432124421245212462124721248212492125021251212522125321254212552125621257212582125921260212612126221263212642126521266212672126821269212702127121272212732127421275212762127721278212792128021281212822128321284212852128621287212882128921290212912129221293212942129521296212972129821299213002130121302213032130421305213062130721308213092131021311213122131321314213152131621317213182131921320213212132221323213242132521326213272132821329213302133121332213332133421335213362133721338213392134021341213422134321344213452134621347213482134921350213512135221353213542135521356213572135821359213602136121362213632136421365213662136721368213692137021371213722137321374213752137621377213782137921380213812138221383213842138521386213872138821389213902139121392213932139421395213962139721398213992140021401214022140321404214052140621407214082140921410214112141221413214142141521416214172141821419214202142121422214232142421425214262142721428214292143021431214322143321434214352143621437214382143921440214412144221443214442144521446214472144821449214502145121452214532145421455214562145721458214592146021461214622146321464214652146621467214682146921470214712147221473214742147521476214772147821479214802148121482214832148421485214862148721488214892149021491214922149321494214952149621497214982149921500215012150221503215042150521506215072150821509215102151121512215132151421515215162151721518215192152021521215222152321524215252152621527215282152921530215312153221533215342153521536215372153821539215402154121542215432154421545215462154721548215492155021551215522155321554215552155621557215582155921560215612156221563215642156521566215672156821569215702157121572215732157421575215762157721578215792158021581215822158321584215852158621587215882158921590215912159221593215942159521596215972159821599216002160121602216032160421605216062160721608216092161021611216122161321614216152161621617216182161921620216212162221623216242162521626216272162821629216302163121632216332163421635216362163721638216392164021641216422164321644216452164621647216482164921650216512165221653216542165521656216572165821659216602166121662216632166421665216662166721668216692167021671216722167321674216752167621677216782167921680216812168221683216842168521686216872168821689216902169121692216932169421695216962169721698216992170021701217022170321704217052170621707217082170921710217112171221713217142171521716217172171821719217202172121722217232172421725217262172721728217292173021731217322173321734217352173621737217382173921740217412174221743217442174521746217472174821749217502175121752217532175421755217562175721758217592176021761217622176321764217652176621767217682176921770217712177221773217742177521776217772177821779217802178121782217832178421785217862178721788217892179021791217922179321794217952179621797217982179921800218012180221803218042180521806218072180821809218102181121812218132181421815218162181721818218192182021821218222182321824218252182621827218282182921830218312183221833218342183521836218372183821839218402184121842218432184421845218462184721848218492185021851218522185321854218552185621857218582185921860218612186221863218642186521866218672186821869218702187121872218732187421875218762187721878218792188021881218822188321884218852188621887218882188921890218912189221893218942189521896218972189821899219002190121902219032190421905219062190721908219092191021911219122191321914219152191621917219182191921920219212192221923219242192521926219272192821929219302193121932219332193421935219362193721938219392194021941219422194321944219452194621947219482194921950219512195221953219542195521956219572195821959219602196121962219632196421965219662196721968219692197021971219722197321974219752197621977219782197921980219812198221983219842198521986219872198821989219902199121992219932199421995219962199721998219992200022001220022200322004220052200622007220082200922010220112201222013220142201522016220172201822019220202202122022220232202422025220262202722028220292203022031220322203322034220352203622037220382203922040220412204222043220442204522046220472204822049220502205122052220532205422055220562205722058220592206022061220622206322064220652206622067220682206922070220712207222073220742207522076220772207822079220802208122082220832208422085220862208722088220892209022091220922209322094220952209622097220982209922100221012210222103221042210522106221072210822109221102211122112221132211422115221162211722118221192212022121221222212322124221252212622127221282212922130221312213222133221342213522136221372213822139221402214122142221432214422145221462214722148221492215022151221522215322154221552215622157221582215922160221612216222163221642216522166221672216822169221702217122172221732217422175221762217722178221792218022181221822218322184221852218622187221882218922190221912219222193221942219522196221972219822199222002220122202222032220422205222062220722208222092221022211222122221322214222152221622217222182221922220222212222222223222242222522226222272222822229222302223122232222332223422235222362223722238222392224022241222422224322244222452224622247222482224922250222512225222253222542225522256222572225822259222602226122262222632226422265222662226722268222692227022271222722227322274222752227622277222782227922280222812228222283222842228522286222872228822289222902229122292222932229422295222962229722298222992230022301223022230322304223052230622307223082230922310223112231222313223142231522316223172231822319223202232122322223232232422325223262232722328223292233022331223322233322334223352233622337223382233922340223412234222343223442234522346223472234822349223502235122352223532235422355223562235722358223592236022361223622236322364223652236622367223682236922370223712237222373223742237522376223772237822379223802238122382223832238422385223862238722388223892239022391223922239322394223952239622397223982239922400224012240222403224042240522406224072240822409224102241122412224132241422415224162241722418224192242022421224222242322424224252242622427224282242922430224312243222433224342243522436224372243822439224402244122442224432244422445224462244722448224492245022451224522245322454224552245622457224582245922460224612246222463224642246522466224672246822469224702247122472224732247422475224762247722478224792248022481224822248322484224852248622487224882248922490224912249222493224942249522496224972249822499225002250122502225032250422505225062250722508225092251022511225122251322514225152251622517225182251922520225212252222523225242252522526225272252822529225302253122532225332253422535225362253722538225392254022541225422254322544225452254622547225482254922550225512255222553225542255522556225572255822559225602256122562225632256422565225662256722568225692257022571225722257322574225752257622577225782257922580225812258222583225842258522586225872258822589225902259122592225932259422595225962259722598225992260022601226022260322604226052260622607226082260922610226112261222613226142261522616226172261822619226202262122622226232262422625226262262722628226292263022631226322263322634226352263622637226382263922640226412264222643226442264522646226472264822649226502265122652226532265422655226562265722658226592266022661226622266322664226652266622667226682266922670226712267222673226742267522676226772267822679226802268122682226832268422685226862268722688226892269022691226922269322694226952269622697226982269922700227012270222703227042270522706227072270822709227102271122712227132271422715227162271722718227192272022721227222272322724227252272622727227282272922730227312273222733227342273522736227372273822739227402274122742227432274422745227462274722748227492275022751227522275322754227552275622757227582275922760227612276222763227642276522766227672276822769227702277122772227732277422775227762277722778227792278022781227822278322784227852278622787227882278922790227912279222793227942279522796227972279822799228002280122802228032280422805228062280722808228092281022811228122281322814228152281622817228182281922820228212282222823228242282522826228272282822829228302283122832228332283422835228362283722838228392284022841228422284322844228452284622847228482284922850228512285222853228542285522856228572285822859228602286122862228632286422865228662286722868228692287022871228722287322874228752287622877228782287922880228812288222883228842288522886228872288822889228902289122892228932289422895228962289722898228992290022901229022290322904229052290622907229082290922910229112291222913229142291522916229172291822919229202292122922229232292422925229262292722928229292293022931229322293322934229352293622937229382293922940229412294222943229442294522946229472294822949229502295122952229532295422955229562295722958229592296022961229622296322964229652296622967229682296922970229712297222973229742297522976229772297822979229802298122982229832298422985229862298722988229892299022991229922299322994229952299622997229982299923000230012300223003230042300523006230072300823009230102301123012230132301423015230162301723018230192302023021230222302323024230252302623027230282302923030230312303223033230342303523036230372303823039230402304123042230432304423045230462304723048230492305023051230522305323054230552305623057230582305923060230612306223063230642306523066230672306823069230702307123072230732307423075230762307723078230792308023081230822308323084230852308623087230882308923090230912309223093230942309523096230972309823099231002310123102231032310423105231062310723108231092311023111231122311323114231152311623117231182311923120231212312223123231242312523126231272312823129231302313123132231332313423135231362313723138231392314023141231422314323144231452314623147231482314923150231512315223153231542315523156231572315823159231602316123162231632316423165231662316723168231692317023171231722317323174231752317623177231782317923180231812318223183231842318523186231872318823189231902319123192231932319423195231962319723198231992320023201232022320323204232052320623207232082320923210232112321223213232142321523216232172321823219232202322123222232232322423225232262322723228232292323023231232322323323234232352323623237232382323923240232412324223243232442324523246232472324823249232502325123252232532325423255232562325723258232592326023261232622326323264232652326623267232682326923270232712327223273232742327523276232772327823279232802328123282232832328423285232862328723288232892329023291232922329323294232952329623297232982329923300233012330223303233042330523306233072330823309233102331123312233132331423315233162331723318233192332023321233222332323324233252332623327233282332923330233312333223333233342333523336233372333823339233402334123342233432334423345233462334723348233492335023351233522335323354233552335623357233582335923360233612336223363233642336523366233672336823369233702337123372233732337423375233762337723378233792338023381233822338323384233852338623387233882338923390233912339223393233942339523396233972339823399234002340123402234032340423405234062340723408234092341023411234122341323414234152341623417234182341923420234212342223423234242342523426234272342823429234302343123432234332343423435234362343723438234392344023441234422344323444234452344623447234482344923450234512345223453234542345523456234572345823459234602346123462234632346423465234662346723468234692347023471234722347323474234752347623477234782347923480234812348223483234842348523486234872348823489234902349123492234932349423495234962349723498234992350023501235022350323504235052350623507235082350923510235112351223513235142351523516235172351823519235202352123522235232352423525235262352723528235292353023531235322353323534235352353623537235382353923540235412354223543235442354523546235472354823549235502355123552235532355423555235562355723558235592356023561235622356323564235652356623567235682356923570235712357223573235742357523576235772357823579235802358123582235832358423585235862358723588235892359023591235922359323594235952359623597235982359923600236012360223603236042360523606236072360823609236102361123612236132361423615236162361723618236192362023621236222362323624236252362623627236282362923630236312363223633236342363523636236372363823639236402364123642236432364423645236462364723648236492365023651236522365323654236552365623657236582365923660236612366223663236642366523666236672366823669236702367123672236732367423675236762367723678236792368023681236822368323684236852368623687236882368923690236912369223693236942369523696236972369823699237002370123702237032370423705237062370723708237092371023711237122371323714237152371623717237182371923720237212372223723237242372523726237272372823729237302373123732237332373423735237362373723738237392374023741237422374323744237452374623747237482374923750237512375223753237542375523756237572375823759237602376123762237632376423765237662376723768237692377023771237722377323774237752377623777237782377923780237812378223783237842378523786237872378823789237902379123792237932379423795237962379723798237992380023801238022380323804238052380623807238082380923810238112381223813238142381523816238172381823819238202382123822238232382423825238262382723828238292383023831238322383323834238352383623837238382383923840238412384223843238442384523846238472384823849238502385123852238532385423855238562385723858238592386023861238622386323864238652386623867238682386923870238712387223873238742387523876238772387823879238802388123882238832388423885238862388723888238892389023891238922389323894238952389623897238982389923900239012390223903239042390523906239072390823909239102391123912239132391423915239162391723918239192392023921239222392323924239252392623927239282392923930239312393223933239342393523936239372393823939239402394123942239432394423945239462394723948239492395023951239522395323954239552395623957239582395923960239612396223963239642396523966239672396823969239702397123972239732397423975239762397723978239792398023981239822398323984239852398623987239882398923990239912399223993239942399523996239972399823999240002400124002240032400424005240062400724008240092401024011240122401324014240152401624017240182401924020240212402224023240242402524026240272402824029240302403124032240332403424035240362403724038240392404024041240422404324044240452404624047240482404924050240512405224053240542405524056240572405824059240602406124062240632406424065240662406724068240692407024071240722407324074240752407624077240782407924080240812408224083240842408524086240872408824089240902409124092240932409424095240962409724098240992410024101241022410324104241052410624107241082410924110241112411224113241142411524116241172411824119241202412124122241232412424125241262412724128241292413024131241322413324134241352413624137241382413924140241412414224143241442414524146241472414824149241502415124152241532415424155241562415724158241592416024161241622416324164241652416624167241682416924170241712417224173241742417524176241772417824179241802418124182241832418424185241862418724188241892419024191241922419324194241952419624197241982419924200242012420224203242042420524206242072420824209242102421124212242132421424215242162421724218242192422024221242222422324224242252422624227242282422924230242312423224233242342423524236242372423824239242402424124242242432424424245242462424724248242492425024251242522425324254242552425624257242582425924260242612426224263242642426524266242672426824269242702427124272242732427424275242762427724278242792428024281242822428324284242852428624287242882428924290242912429224293242942429524296242972429824299243002430124302243032430424305243062430724308243092431024311243122431324314243152431624317243182431924320243212432224323243242432524326243272432824329243302433124332243332433424335243362433724338243392434024341243422434324344243452434624347243482434924350243512435224353243542435524356243572435824359243602436124362243632436424365243662436724368243692437024371243722437324374243752437624377243782437924380243812438224383243842438524386243872438824389243902439124392243932439424395243962439724398243992440024401244022440324404244052440624407244082440924410244112441224413244142441524416244172441824419244202442124422244232442424425244262442724428244292443024431244322443324434244352443624437244382443924440244412444224443244442444524446244472444824449244502445124452244532445424455244562445724458244592446024461244622446324464244652446624467244682446924470244712447224473244742447524476244772447824479244802448124482244832448424485244862448724488244892449024491244922449324494244952449624497244982449924500245012450224503245042450524506245072450824509245102451124512245132451424515245162451724518245192452024521245222452324524245252452624527245282452924530245312453224533245342453524536245372453824539245402454124542245432454424545245462454724548245492455024551245522455324554245552455624557245582455924560245612456224563245642456524566245672456824569245702457124572245732457424575245762457724578245792458024581245822458324584245852458624587245882458924590245912459224593245942459524596245972459824599246002460124602246032460424605246062460724608246092461024611246122461324614246152461624617246182461924620246212462224623246242462524626246272462824629246302463124632246332463424635246362463724638246392464024641246422464324644246452464624647246482464924650246512465224653246542465524656246572465824659246602466124662246632466424665246662466724668246692467024671246722467324674246752467624677246782467924680246812468224683246842468524686246872468824689246902469124692246932469424695246962469724698246992470024701247022470324704247052470624707247082470924710247112471224713247142471524716247172471824719247202472124722247232472424725247262472724728247292473024731247322473324734247352473624737247382473924740247412474224743247442474524746247472474824749247502475124752247532475424755247562475724758247592476024761247622476324764247652476624767247682476924770247712477224773247742477524776247772477824779247802478124782247832478424785247862478724788247892479024791247922479324794247952479624797247982479924800248012480224803248042480524806248072480824809248102481124812248132481424815248162481724818248192482024821248222482324824248252482624827248282482924830248312483224833248342483524836248372483824839248402484124842248432484424845248462484724848248492485024851248522485324854248552485624857248582485924860248612486224863248642486524866248672486824869248702487124872248732487424875248762487724878248792488024881248822488324884248852488624887248882488924890248912489224893248942489524896248972489824899249002490124902249032490424905249062490724908249092491024911249122491324914249152491624917249182491924920249212492224923249242492524926249272492824929249302493124932249332493424935249362493724938
							/*
 * Copyright (c) 2019 Nuclei Limited. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef __CORE_FEATURE_DSP__
#define __CORE_FEATURE_DSP__

/*!
 * @file     core_feature_dsp.h
 * @brief    DSP feature API header file for Nuclei N/NX Core
 */
/*
 * DSP Feature Configuration Macro:
 * 1. __DSP_PRESENT:  Define whether Digital Signal Processing Unit(DSP) is present or not
 *   * 0: Not present
 *   * 1: Present
 */
#ifdef __cplusplus
 extern "C" {
#endif

#include "core_feature_base.h"

#if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1)

#if defined(__INC_INTRINSIC_API) && (__INC_INTRINSIC_API == 1)
#if defined(__zcc__)
#include <rvp_intrinsic.h>
#else
#if !defined(__ICCRISCV__) && !defined(__llvm__)
#include <rvp_intrinsic.h>
#endif
#endif
#endif

#ifndef __ICCRISCV__
/* ###########################  CPU SIMD DSP Intrinsic Functions ########################### */
/**
 * \defgroup NMSIS_Core_DSP_Intrinsic   Intrinsic Functions for SIMD Instructions
 * \ingroup  NMSIS_Core
 * \brief    Functions that generate RISC-V DSP SIMD instructions.
 * \details
 *
 * The following functions generate specified RISC-V SIMD instructions that cannot be directly accessed by compiler.
 * * **DSP ISA Extension Instruction Summary**
 *   + **Shorthand Definitions**
 *     - r.H == rH1: r[31:16], r.L == r.H0: r[15:0]
 *     - r.B3: r[31:24], r.B2: r[23:16], r.B1: r[15:8], r.B0: r[7:0]
 *     - r.B[x]: r[(x*8+7):(x*8+0)]
 *     - r.H[x]: r[(x*16+7):(x*16+0)]
 *     - r.W[x]: r[(x*32+31):(x*32+0)]
 *     - r[xU]: the upper 32-bit of a 64-bit number; xU represents the GPR number that contains this upper part 32-bit value.
 *     - r[xL]: the lower 32-bit of a 64-bit number; xL represents the GPR number that contains this lower part 32-bit value.
 *     - r[xU].r[xL]: a 64-bit number that is formed from a pair of GPRs.
 *     - s>>: signed arithmetic right shift:
 *     - u>>: unsigned logical right shift
 *     - SAT.Qn(): Saturate to the range of [-2^n, 2^n-1], if saturation happens, set PSW.OV.
 *     - SAT.Um(): Saturate to the range of [0, 2^m-1], if saturation happens, set PSW.OV.
 *     - RUND(): Indicate `rounding`, i.e., add 1 to the most significant discarded bit for right shift or MSW-type multiplication instructions.
 *     - Sign or Zero Extending functions:
 *       - SEm(data): Sign-Extend data to m-bit.:
 *       - ZEm(data): Zero-Extend data to m-bit.
 *     - ABS(x): Calculate the absolute value of `x`.
 *     - CONCAT(x,y): Concatinate `x` and `y` to form a value.
 *     - u<: Unsinged less than comparison.
 *     - u<=: Unsinged less than & equal comparison.
 *     - u>: Unsinged greater than comparison.
 *     - s*: Signed multiplication.
 *     - u*: Unsigned multiplication.
 *
 *   @{
 */
/** @} */ /* End of Doxygen Group NMSIS_Core_DSP_Intrinsic */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS      SIMD Data Processing Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    SIMD Data Processing Instructions
 * \details
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB      SIMD 16-bit Add/Subtract Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 16-bit Add/Subtract Instructions
 * \details
 * Based on the combination of the types of the two 16-bit arithmetic operations, the SIMD 16-bit
 * add/subtract instructions can be classified into 6 main categories: Addition (two 16-bit addition),
 * Subtraction (two 16-bit subtraction), Crossed Add & Sub (one addition and one subtraction), and
 * Crossed Sub & Add (one subtraction and one addition), Straight Add & Sub (one addition and one
 * subtraction), and Straight Sub & Add (one subtraction and one addition).
 * Based on the way of how an overflow condition is handled, the SIMD 16-bit add/subtract
 * instructions can be classified into 5 groups: Wrap-around (dropping overflow), Signed Halving
 * (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed Saturation (clipping overflow),
 * and Unsigned Saturation.
 * Together, there are 30 SIMD 16-bit add/subtract instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB      SIMD 8-bit Addition & Subtraction Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 8-bit Addition & Subtraction Instructions
 * \details
 * Based on the types of the four 8-bit arithmetic operations, the SIMD 8-bit add/subtract instructions
 * can be classified into 2 main categories: Addition (four 8-bit addition), and Subtraction (four 8-bit
 * subtraction).
 * Based on the way of how an overflow condition is handled for singed or unsigned operation, the
 * SIMD 8-bit add/subtract instructions can be classified into 5 groups: Wrap-around (dropping
 * overflow), Signed Halving (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed
 * Saturation (clipping overflow), and Unsigned Saturation.
 * Together, there are 10 SIMD 8-bit add/subtract instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT      SIMD 16-bit Shift Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 16-bit Shift Instructions
 * \details
 * there are 14 SIMD 16-bit shift instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT      SIMD 8-bit Shift Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 8-bit Shift Instructions
 * \details
 *  there are 14 SIMD 8-bit shift instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP      SIMD 16-bit Compare Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 16-bit Compare Instructions
 * \details
 *  there are 5 SIMD 16-bit Compare instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP      SIMD 8-bit Compare Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 8-bit Compare Instructions
 * \details
 *  there are 5  SIMD 8-bit Compare instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY      SIMD 16-bit Multiply Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 16-bit Multiply Instructions
 * \details
 * there are 6 SIMD 16-bit Multiply instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY      SIMD 8-bit Multiply Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 8-bit Multiply Instructions
 * \details
 *  there are 6 SIMD 8-bit Multiply instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC      SIMD 16-bit Miscellaneous Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 16-bit Miscellaneous Instructions
 * \details
 *  there are 10 SIMD 16-bit Misc instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC      SIMD 8-bit Miscellaneous Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 8-bit Miscellaneous Instructions
 * \details
 *  there are 10 SIMD 8-bit Miscellaneous instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK      SIMD 8-bit Unpacking Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 8-bit Unpacking Instructions
 * \details
 *  there are 8 SIMD 8-bit Unpacking instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD      Non-SIMD Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    Non-SIMD Instructions
 * \details
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU      Non-SIMD Q15 saturation ALU Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
 * \brief    Non-SIMD Q15 saturation ALU Instructions
 * \details
 * there are 7 Non-SIMD Q15 saturation ALU Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU      Non-SIMD Q31 saturation ALU Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
 * \brief    Non-SIMD Q31 saturation ALU Instructions
 * \details
 *  there are Non-SIMD Q31 saturation ALU Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION      32-bit Computation Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
 * \brief    32-bit Computation Instructions
 * \details
 * there are 8 32-bit Computation Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC      OV (Overflow) flag Set/Clear Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
 * \brief    OV (Overflow) flag Set/Clear Instructions
 * \details
 * The following table lists the user instructions related to Overflow (OV) flag manipulation. there are 2 OV (Overflow) flag Set/Clear Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC      Non-SIMD Miscellaneous Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
 * \brief    Non-SIMD Miscellaneous Instructions
 * \details
 * There are 13 Miscellaneous Instructions here.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS      Partial-SIMD Data Processing Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    Partial-SIMD Data Processing Instructions
 * \details
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK      SIMD 16-bit Packing Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
 * \brief    SIMD 16-bit Packing Instructions
 * \details
 * there are 4 SIMD16-bit Packing Instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC      Signed MSW 32x32 Multiply and Add Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
 * \brief    Signed MSW 32x32 Multiply and Add Instructions
 * \details
 *  there are 8 Signed MSW 32x32 Multiply and Add Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC      Signed MSW 32x16 Multiply and Add Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
 * \brief    Signed MSW 32x16 Multiply and Add Instructions
 * \details
 * there are 15 Signed MSW 32x16 Multiply and Add Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB      Signed 16-bit Multiply 32-bit Add/Subtract Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
 * \brief    Signed 16-bit Multiply 32-bit Add/Subtract Instructions
 * \details
 *  there are 18 Signed 16-bit Multiply 32-bit Add/Subtract Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB      Signed 16-bit Multiply 64-bit Add/Subtract Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
 * \brief    Signed 16-bit Multiply 64-bit Add/Subtract Instructions
 * \details
 *  there is Signed 16-bit Multiply 64-bit Add/Subtract Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC      Partial-SIMD Miscellaneous Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
 * \brief    Partial-SIMD Miscellaneous Instructions
 * \details
 *  there are  7 Partial-SIMD Miscellaneous Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD      8-bit Multiply with 32-bit Add Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
 * \brief    8-bit Multiply with 32-bit Add Instructions
 * \details
 * there are  3 8-bit Multiply with 32-bit Add Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE      64-bit Profile Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    64-bit Profile Instructions
 * \details
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB      64-bit Addition & Subtraction Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_64B_PROFILE
 * \brief    64-bit Addition & Subtraction Instructions
 * \details
 * there are 10 64-bit Addition & Subtraction Instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB      32-bit Multiply with 64-bit Add/Subtract Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_64B_PROFILE
 * \brief    32-bit Multiply with 64-bit Add/Subtract Instructions
 * \details
 *  there are 32-bit Multiply 64-bit Add/Subtract Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB      Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_64B_PROFILE
 * \brief    Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
 * \details
 * there are 10 Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY      RV64 Only Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    RV64 Only Instructions
 * \details
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB      (RV64 Only) SIMD 32-bit Add/Subtract Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) SIMD 32-bit Add/Subtract Instructions
 * \details
 * The following tables list instructions that are only present in RV64.
 * There are 30 SIMD 32-bit addition or subtraction instructions.there are 4 SIMD16-bit Packing Instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT      (RV64 Only) SIMD 32-bit Shift Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) SIMD 32-bit Shift Instructions
 * \details
 *  there are 14 (RV64 Only) SIMD 32-bit Shift Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC      (RV64 Only) SIMD 32-bit Miscellaneous Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) SIMD 32-bit Miscellaneous Instructions
 * \details
 * there are 5  (RV64 Only) SIMD 32-bit Miscellaneous Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT      (RV64 Only) SIMD Q15 Saturating Multiply Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) SIMD Q15 Saturating Multiply Instructions
 * \details
 *  there are 9 (RV64 Only) SIMD Q15 saturating Multiply Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT      (RV64 Only) 32-bit Multiply Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) 32-bit Multiply Instructions
 * \details
 *  there is 3 RV64 Only) 32-bit Multiply Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD      (RV64 Only) 32-bit Multiply & Add Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) 32-bit Multiply & Add Instructions
 * \details
 *  there are  3 (RV64 Only) 32-bit Multiply & Add Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC      (RV64 Only) 32-bit Parallel Multiply & Add Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) 32-bit Parallel Multiply & Add Instructions
 * \details
 * there are 12 (RV64 Only) 32-bit Parallel Multiply & Add Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT      (RV64 Only) Non-SIMD 32-bit Shift Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) Non-SIMD 32-bit Shift Instructions
 * \details
 *  there are 1  (RV64 Only) Non-SIMD 32-bit Shift Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK      32-bit Packing Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    32-bit Packing Instructions
 * \details
 *  There are four 32-bit packing instructions here
 */

/* ===== Inline Function Start for 3.1. ADD8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief ADD8 (SIMD 8-bit Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * ADD8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit integer element additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 8-bit integer elements in Rs1 with the 8-bit integer elements
 * in Rs2, and then writes the 8-bit element results to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned addition.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = Rs1.B[x] + Rs2.B[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ADD8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("add8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.1. ADD8 ===== */

/* ===== Inline Function Start for 3.2. ADD16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief ADD16 (SIMD 16-bit Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * ADD16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 16-bit integer elements in Rs1 with the 16-bit integer
 * elements in Rs2, and then writes the 16-bit element results to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned addition.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = Rs1.H[x] + Rs2.H[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ADD16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("add16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.2. ADD16 ===== */

/* ===== Inline Function Start for 3.3. ADD64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief ADD64 (64-bit Addition)
 * \details
 * **Type**: 64-bit Profile
 *
 * **Syntax**:\n
 * ~~~
 * ADD64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add two 64-bit signed or unsigned integers.
 *
 * **RV32 Description**:\n
 * This instruction adds the 64-bit integer of an even/odd pair of registers specified
 * by Rs1(4,1) with the 64-bit integer of an even/odd pair of registers specified by Rs2(4,1), and then
 * writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
 * pair includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction has the same behavior as the ADD instruction in RV64I.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned addition.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 *  t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 *  a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
 *  b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
 *  R[t_H].R[t_L] = R[a_H].R[a_L] + R[b_H].R[b_L];
 * RV64:
 *  Rd = Rs1 + Rs2;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_ADD64(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("add64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.3. ADD64 ===== */

/* ===== Inline Function Start for 3.4. AVE ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief AVE (Average with Rounding)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * AVE Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Calculate the average of the contents of two general registers.
 *
 * **Description**:\n
 * This instruction calculates the average value of two signed integers stored in Rs1 and
 * Rs2, rounds up a half-integer result to the nearest integer, and writes the result to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Sum = CONCAT(Rs1[MSB],Rs1[MSB:0]) + CONCAT(Rs2[MSB],Rs2[MSB:0]) + 1;
 * Rd = Sum[(MSB+1):1];
 * for RV32: MSB=31,
 * for RV64: MSB=63
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_AVE(long a, long b)
{
    long result;
    __ASM volatile("ave %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.4. AVE ===== */

/* ===== Inline Function Start for 3.5. BITREV ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief BITREV (Bit Reverse)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * BITREV Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Reverse the bit positions of the source operand within a specified width starting from bit
 * 0. The reversed width is a variable from a GPR.
 *
 * **Description**:\n
 * This instruction reverses the bit positions of the content of Rs1. The reversed bit width
 * is calculated as Rs2[4:0]+1 (RV32) or Rs2[5:0]+1 (RV64). The upper bits beyond the reversed width
 * are filled with zeros. After the bit reverse operation, the result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * msb = Rs2[4:0]; (for RV32)
 * msb = Rs2[5:0]; (for RV64)
 * rev[0:msb] = Rs1[msb:0];
 * Rd = ZE(rev[msb:0]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_BITREV(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("bitrev %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.5. BITREV ===== */

/* ===== Inline Function Start for 3.6. BITREVI ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief BITREVI (Bit Reverse Immediate)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * (RV32) BITREVI Rd, Rs1, imm[4:0]
 * (RV64) BITREVI Rd, Rs1, imm[5:0]
 * ~~~
 *
 * **Purpose**:\n
 * Reverse the bit positions of the source operand within a specified width starting from bit
 * 0. The reversed width is an immediate value.
 *
 * **Description**:\n
 * This instruction reverses the bit positions of the content of Rs1. The reversed bit width
 * is calculated as imm[4:0]+1 (RV32) or imm[5:0]+1 (RV64). The upper bits beyond the reversed width
 * are filled with zeros. After the bit reverse operation, the result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * msb = imm[4:0]; (RV32)
 * msb = imm[5:0]; (RV64)
 * rev[0:msb] = Rs1[msb:0];
 * Rd = ZE(rev[msb:0]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_BITREVI(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("bitrevi %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.6. BITREVI ===== */

/* ===== Inline Function Start for 3.7. BPICK ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief BPICK (Bit-wise Pick)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * BPICK Rd, Rs1, Rs2, Rc
 * ~~~
 *
 * **Purpose**:\n
 * Select from two source operands based on a bit mask in the third operand.
 *
 * **Description**:\n
 * This instruction selects individual bits from Rs1 or Rs2, based on the bit mask value in
 * Rc. If a bit in Rc is 1, the corresponding bit is from Rs1; otherwise, the corresponding bit is from Rs2.
 * The selection results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd[x] = Rc[x]? Rs1[x] : Rs2[x];
 * for RV32, x=31...0
 * for RV64, x=63...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \param [in]  c    unsigned long type of value stored in c
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_BPICK(unsigned long a, unsigned long b, unsigned long c)
{
    unsigned long result;
    __ASM volatile("bpick %0, %1, %2, %3" : "=r"(result) : "r"(a), "r"(b), "r"(c));
    return result;
}
/* ===== Inline Function End for 3.7. BPICK ===== */

/* ===== Inline Function Start for 3.8. CLROV ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC
 * \brief CLROV (Clear OV flag)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * CLROV # pseudo mnemonic
 * ~~~
 *
 * **Purpose**:\n
 * This pseudo instruction is an alias to `CSRRCI x0, ucode, 1` instruction.
 *
 *
 */
__STATIC_FORCEINLINE void __RV_CLROV(void)
{
    __ASM volatile("clrov ");
}
/* ===== Inline Function End for 3.8. CLROV ===== */

/* ===== Inline Function Start for 3.9. CLRS8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief CLRS8 (SIMD 8-bit Count Leading Redundant Sign)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLRS8 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of redundant sign bits of the 8-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the bits next to the sign bits of the 8-bit elements of Rs1, this instruction
 * counts the number of redundant sign bits and writes the result to the corresponding 8-bit elements
 * of Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.B[x];
 * cnt[x] = 0;
 * for (i = 6 to 0) {
 *   if (snum[x](i) == snum[x](7)) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.B[x] = cnt[x];
 * for RV32: x=3...0
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLRS8(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clrs8 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.9. CLRS8 ===== */

/* ===== Inline Function Start for 3.10. CLRS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief CLRS16 (SIMD 16-bit Count Leading Redundant Sign)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLRS16 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of redundant sign bits of the 16-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the bits next to the sign bits of the 16-bit elements of Rs1, this
 * instruction counts the number of redundant sign bits and writes the result to the corresponding 16-
 * bit elements of Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.H[x];
 * cnt[x] = 0;
 * for (i = 14 to 0) {
 *   if (snum[x](i) == snum[x](15)) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.H[x] = cnt[x];
 * for RV32: x=1...0
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLRS16(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clrs16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.10. CLRS16 ===== */

/* ===== Inline Function Start for 3.11. CLRS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
 * \brief CLRS32 (SIMD 32-bit Count Leading Redundant Sign)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLRS32 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of redundant sign bits of the 32-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the bits next to the sign bits of the 32-bit elements of Rs1, this
 * instruction counts the number of redundant sign bits and writes the result to the corresponding 32-
 * bit elements of Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.W[x];
 * cnt[x] = 0;
 * for (i = 30 to 0) {
 *   if (snum[x](i) == snum[x](31)) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.W[x] = cnt[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLRS32(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clrs32 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.11. CLRS32 ===== */

/* ===== Inline Function Start for 3.12. CLO8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief CLO8 (SIMD 8-bit Count Leading One)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLO8 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of leading one bits of the 8-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction
 * counts the number of leading one bits and writes the results to the corresponding 8-bit elements of
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.B[x];
 * cnt[x] = 0;
 *   for (i = 7 to 0) {
 *   if (snum[x](i) == 1) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.B[x] = cnt[x];
 * for RV32: x=3...0
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLO8(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clo8 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.12. CLO8 ===== */

/* ===== Inline Function Start for 3.13. CLO16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief CLO16 (SIMD 16-bit Count Leading One)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLO16 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of leading one bits of the 16-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction
 * counts the number of leading one bits and writes the results to the corresponding 16-bit elements
 * of Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.H[x];
 * cnt[x] = 0;
 * for (i = 15 to 0) {
 *   if (snum[x](i) == 1) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.H[x] = cnt[x];
 * for RV32: x=1...0
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLO16(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clo16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.13. CLO16 ===== */

/* ===== Inline Function Start for 3.14. CLO32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
 * \brief CLO32 (SIMD 32-bit Count Leading One)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLO32 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of leading one bits of the 32-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction
 * counts the number of leading one bits and writes the results to the corresponding 32-bit elements
 * of Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.W[x];
 * cnt[x] = 0;
 * for (i = 31 to 0) {
 *   if (snum[x](i) == 1) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.W[x] = cnt[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLO32(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clo32 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.14. CLO32 ===== */

/* ===== Inline Function Start for 3.15. CLZ8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief CLZ8 (SIMD 8-bit Count Leading Zero)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLZ8 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of leading zero bits of the 8-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction
 * counts the number of leading zero bits and writes the results to the corresponding 8-bit elements of
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.B[x];
 * cnt[x] = 0;
 * for (i = 7 to 0) {
 *   if (snum[x](i) == 0) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.B[x] = cnt[x];
 * for RV32: x=3...0
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLZ8(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clz8 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.15. CLZ8 ===== */

/* ===== Inline Function Start for 3.16. CLZ16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief CLZ16 (SIMD 16-bit Count Leading Zero)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLZ16 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of leading zero bits of the 16-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction
 * counts the number of leading zero bits and writes the results to the corresponding 16-bit elements
 * of Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.H[x];
 * cnt[x] = 0;
 * for (i = 15 to 0) {
 *   if (snum[x](i) == 0) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.H[x] = cnt[x];
 * for RV32: x=1...0
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLZ16(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clz16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.16. CLZ16 ===== */

/* ===== Inline Function Start for 3.17. CLZ32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
 * \brief CLZ32 (SIMD 32-bit Count Leading Zero)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLZ32 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of leading zero bits of the 32-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction
 * counts the number of leading zero bits and writes the results to the corresponding 32-bit elements
 * of Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.W[x];
 * cnt[x] = 0;
 * for (i = 31 to 0) {
 *   if (snum[x](i) == 0) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.W[x] = cnt[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLZ32(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clz32 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.17. CLZ32 ===== */

/* ===== Inline Function Start for 3.18. CMPEQ8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
 * \brief CMPEQ8 (SIMD 8-bit Integer Compare Equal)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CMPEQ8 Rs, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit integer elements equal comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit integer elements in Rs1 with the 8-bit integer
 * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFF; otherwise, the result is
 * 0x0. The 8-bit element comparison results are written to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned numbers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] == Rs2.B[x])? 0xff : 0x0;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CMPEQ8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("cmpeq8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.18. CMPEQ8 ===== */

/* ===== Inline Function Start for 3.19. CMPEQ16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
 * \brief CMPEQ16 (SIMD 16-bit Integer Compare Equal)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CMPEQ16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer elements equal comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit integer elements in Rs1 with the 16-bit integer
 * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFFFF; otherwise, the result
 * is 0x0. The 16-bit element comparison results are written to Rt.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned numbers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] == Rs2.H[x])? 0xffff : 0x0;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CMPEQ16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("cmpeq16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.19. CMPEQ16 ===== */

/* ===== Inline Function Start for 3.20. CRAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief CRAS16 (SIMD 16-bit Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CRAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit
 * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with
 * the 16-bit integer element in [15:0] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit
 * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [31:16] of 32-bit chunks in
 * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32-
 * bit chunks in Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][15:0];
 * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][31:16];
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CRAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("cras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.20. CRAS16 ===== */

/* ===== Inline Function Start for 3.21. CRSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief CRSA16 (SIMD 16-bit Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CRSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit
 * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit integer element in [15:0] of 32-bit chunks in Rs2
 * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of
 * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [31:16] of 32-bit chunks
 * in Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to
 * [15:0] of 32-bit chunks in Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][15:0];
 * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][31:16];
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CRSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("crsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.21. CRSA16 ===== */

/* ===== Inline Function Start for 3.22. INSB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief INSB (Insert Byte)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * (RV32) INSB Rd, Rs1, imm[1:0]
 * (RV64) INSB Rd, Rs1, imm[2:0]
 * ~~~
 *
 * **Purpose**:\n
 * Insert byte 0 of a 32-bit or 64-bit register into one of the byte elements of another register.
 *
 * **Description**:\n
 * This instruction inserts byte 0 of Rs1 into byte `imm[1:0]` (RV32) or `imm[2:0]` (RV64)
 * of Rd.
 *
 * **Operations**:\n
 * ~~~
 * bpos = imm[1:0]; (RV32)
 * bpos = imm[2:0]; (RV64)
 * Rd.B[bpos] = Rs1.B[0]
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_INSB(t, a, b)    \
    ({    \
        unsigned long __t = (unsigned long)(t);    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("insb %0, %1, %2" : "+r"(__t) : "r"(__a), "K"(b));    \
        __t;    \
    })
/* ===== Inline Function End for 3.22. INSB ===== */

/* ===== Inline Function Start for 3.23. KABS8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief KABS8 (SIMD 8-bit Saturating Absolute)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KABS8 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Get the absolute value of 8-bit signed integer elements simultaneously.
 *
 * **Description**:\n
 * This instruction calculates the absolute value of 8-bit signed integer elements stored
 * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates
 * 0x7f as the output and sets the OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.B[x];
 * if (src == 0x80) {
 *   src = 0x7f;
 *   OV = 1;
 * } else if (src[7] == 1)
 *   src = -src;
 * }
 * Rd.B[x] = src;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KABS8(unsigned long a)
{
    unsigned long result;
    __ASM volatile("kabs8 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.23. KABS8 ===== */

/* ===== Inline Function Start for 3.24. KABS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief KABS16 (SIMD 16-bit Saturating Absolute)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KABS16 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Get the absolute value of 16-bit signed integer elements simultaneously.
 *
 * **Description**:\n
 * This instruction calculates the absolute value of 16-bit signed integer elements stored
 * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction
 * generates 0x7fff as the output and sets the OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.H[x];
 * if (src == 0x8000) {
 *   src = 0x7fff;
 *   OV = 1;
 * } else if (src[15] == 1)
 *   src = -src;
 * }
 * Rd.H[x] = src;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KABS16(unsigned long a)
{
    unsigned long result;
    __ASM volatile("kabs16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.24. KABS16 ===== */

/* ===== Inline Function Start for 3.25. KABSW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KABSW (Scalar 32-bit Absolute Value with Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KABSW Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Get the absolute value of a signed 32-bit integer in a general register.
 *
 * **Description**:\n
 * This instruction calculates the absolute value of a signed 32-bit integer stored in Rs1.
 * The result is sign-extended (for RV64) and written to Rd. This instruction with the minimum
 * negative integer input of 0x80000000 will produce a saturated output of maximum positive integer
 * of 0x7fffffff and the OV flag will be set to 1.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs1.W[0] >= 0) {
 *   res = Rs1.W[0];
 * } else {
 *   If (Rs1.W[0] == 0x80000000) {
 *     res = 0x7fffffff;
 *     OV = 1;
 *   } else {
 *     res = -Rs1.W[0];
 *   }
 * }
 * Rd = SE32(res);
 * ~~~
 *
 * \param [in]  a    signed long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KABSW(signed long a)
{
    unsigned long result;
    __ASM volatile("kabsw %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.25. KABSW ===== */

/* ===== Inline Function Start for 3.26. KADD8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief KADD8 (SIMD 8-bit Signed Saturating Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KADD8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
 * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they
 * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.B[x] + Rs2.B[x];
 * if (res[x] > 127) {
 *   res[x] = 127;
 *   OV = 1;
 * } else if (res[x] < -128) {
 *   res[x] = -128;
 *   OV = 1;
 * }
 * Rd.B[x] = res[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KADD8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.26. KADD8 ===== */

/* ===== Inline Function Start for 3.27. KADD16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief KADD16 (SIMD 16-bit Signed Saturating Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KADD16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
 * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1),
 * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.H[x] + Rs2.H[x];
 * if (res[x] > 32767) {
 *   res[x] = 32767;
 *   OV = 1;
 * } else if (res[x] < -32768) {
 *   res[x] = -32768;
 *   OV = 1;
 * }
 * Rd.H[x] = res[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KADD16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.27. KADD16 ===== */

/* ===== Inline Function Start for 3.28. KADD64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief KADD64 (64-bit Signed Saturating Addition)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * KADD64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add two 64-bit signed integers. The result is saturated to the Q63 range.
 *
 * **RV32 Description**:\n
 * This instruction adds the 64-bit signed integer of an even/odd pair of registers
 * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by
 * Rs2(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the
 * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers
 * specified by Rd(4,1).
 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
 * pair includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed
 * integer in Rs2. If the result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the
 * range and the OV bit is set to 1. The saturated result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 *  t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 *  a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
 *  b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
 *  result = R[a_H].R[a_L] + R[b_H].R[b_L];
 *  if (result > (2^63)-1) {
 *    result = (2^63)-1; OV = 1;
 *  } else if (result < -2^63) {
 *    result = -2^63; OV = 1;
 *  }
 *  R[t_H].R[t_L] = result;
 * RV64:
 *  result = Rs1 + Rs2;
 *  if (result > (2^63)-1) {
 *    result = (2^63)-1; OV = 1;
 *  } else if (result < -2^63) {
 *    result = -2^63; OV = 1;
 *  }
 *  Rd = result;
 * ~~~
 *
 * \param [in]  a    long long type of value stored in a
 * \param [in]  b    long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_KADD64(long long a, long long b)
{
    long long result;
    __ASM volatile("kadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.28. KADD64 ===== */

/* ===== Inline Function Start for 3.29. KADDH ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
 * \brief KADDH (Signed Addition with Q15 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KADDH Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add the signed lower 32-bit content of two registers with Q15 saturation.
 *
 * **Description**:\n
 * The signed lower 32-bit content of Rs1 is added with the signed lower 32-bit content of
 * Rs2. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then sign-
 * extended and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] + Rs2.W[0];
 * if (tmp > 32767) {
 *   res = 32767;
 *   OV = 1;
 * } else if (tmp < -32768) {
 *   res = -32768;
 *   OV = 1
 * } else {
 *   res = tmp;
 * }
 * Rd = SE(tmp[15:0]);
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KADDH(int a, int b)
{
    long result;
    __ASM volatile("kaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.29. KADDH ===== */

/* ===== Inline Function Start for 3.30. KADDW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KADDW (Signed Addition with Q31 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KADDW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add the lower 32-bit signed content of two registers with Q31 saturation.
 *
 * **Description**:\n
 * The lower 32-bit signed content of Rs1 is added with the lower 32-bit signed content of
 * Rs2. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then sign-
 * extended and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] + Rs2.W[0];
 * if (tmp > (2^31)-1) {
 *   res = (2^31)-1;
 *   OV = 1;
 * } else if (tmp < -2^31) {
 *   res = -2^31;
 *   OV = 1
 * } else {
 *   res = tmp;
 * }
 * Rd = res[31:0]; // RV32
 * Rd = SE(res[31:0]) // RV64
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KADDW(int a, int b)
{
    long result;
    __ASM volatile("kaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.30. KADDW ===== */

/* ===== Inline Function Start for 3.31. KCRAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief KCRAS16 (SIMD 16-bit Signed Saturating Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KCRAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating addition and 16-bit signed integer element
 * saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-
 * bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
 * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it
 * subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed
 * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
 * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for
 * subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
 * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KCRAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.31. KCRAS16 ===== */

/* ===== Inline Function Start for 3.32. KCRSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief KCRSA16 (SIMD 16-bit Signed Saturating Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KCRSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element
 * saturating addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit
 * chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks
 * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it
 * adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 with the 16-bit signed
 * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
 * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd
 * for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
 * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KCRSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.32. KCRSA16 ===== */

/* ===== Inline Function Start for 3.33.1. KDMBB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KDMBB (Signed Saturating Double Multiply B16 x B16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
 * written into the destination register for RV32 or sign-extended to 64-bits and written into the
 * destination register for RV64. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
 * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
 * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
 * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult = aop * bop;
 *   resQ31 = Mresult << 1;
 *   Rd = resQ31; // RV32
 *   Rd = SE(resQ31); // RV64
 * } else {
 *   resQ31 = 0x7FFFFFFF;
 *   Rd = resQ31; // RV32
 *   Rd = SE(resQ31); // RV64
 *   OV = 1;
 * }
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KDMBB(unsigned int a, unsigned int b)
{
    long result;
    __ASM volatile("kdmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.33.1. KDMBB ===== */

/* ===== Inline Function Start for 3.33.2. KDMBT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KDMBT (Signed Saturating Double Multiply B16 x T16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
 * written into the destination register for RV32 or sign-extended to 64-bits and written into the
 * destination register for RV64. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
 * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
 * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
 * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult = aop * bop;
 *   resQ31 = Mresult << 1;
 *   Rd = resQ31; // RV32
 *   Rd = SE(resQ31); // RV64
 * } else {
 *   resQ31 = 0x7FFFFFFF;
 *   Rd = resQ31; // RV32
 *   Rd = SE(resQ31); // RV64
 *   OV = 1;
 * }
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KDMBT(unsigned int a, unsigned int b)
{
    long result;
    __ASM volatile("kdmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.33.2. KDMBT ===== */

/* ===== Inline Function Start for 3.33.3. KDMTT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KDMTT (Signed Saturating Double Multiply T16 x T16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
 * written into the destination register for RV32 or sign-extended to 64-bits and written into the
 * destination register for RV64. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
 * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
 * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
 * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult = aop * bop;
 *   resQ31 = Mresult << 1;
 *   Rd = resQ31; // RV32
 *   Rd = SE(resQ31); // RV64
 * } else {
 *   resQ31 = 0x7FFFFFFF;
 *   Rd = resQ31; // RV32
 *   Rd = SE(resQ31); // RV64
 *   OV = 1;
 * }
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KDMTT(unsigned int a, unsigned int b)
{
    long result;
    __ASM volatile("kdmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.33.3. KDMTT ===== */

/* ===== Inline Function Start for 3.34.1. KDMABB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KDMABB (Signed Saturating Double Multiply Addition B16 x B16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
 * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
 * result into the destination register. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
 * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
 * the OV flag is set to 1. The result after saturation is written to Rd.
 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
 * set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult = aop * bop;
 *   resQ31 = Mresult << 1;
 * } else {
 *   resQ31 = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * resadd = Rd + resQ31; // RV32
 * resadd = Rd.W[0] + resQ31; // RV64
 * if (resadd > (2^31)-1) {
 *   resadd = (2^31)-1;
 *   OV = 1;
 * } else if (resadd < -2^31) {
 *   resadd = -2^31;
 *   OV = 1;
 * }
 * Rd = resadd; // RV32
 * Rd = SE(resadd); // RV64
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KDMABB(long t, unsigned int a, unsigned int b)
{
    __ASM volatile("kdmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.34.1. KDMABB ===== */

/* ===== Inline Function Start for 3.34.2. KDMABT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KDMABT (Signed Saturating Double Multiply Addition B16 x T16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
 * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
 * result into the destination register. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
 * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
 * the OV flag is set to 1. The result after saturation is written to Rd.
 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
 * set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult = aop * bop;
 *   resQ31 = Mresult << 1;
 * } else {
 *   resQ31 = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * resadd = Rd + resQ31; // RV32
 * resadd = Rd.W[0] + resQ31; // RV64
 * if (resadd > (2^31)-1) {
 *   resadd = (2^31)-1;
 *   OV = 1;
 * } else if (resadd < -2^31) {
 *   resadd = -2^31;
 *   OV = 1;
 * }
 * Rd = resadd; // RV32
 * Rd = SE(resadd); // RV64
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KDMABT(long t, unsigned int a, unsigned int b)
{
    __ASM volatile("kdmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.34.2. KDMABT ===== */

/* ===== Inline Function Start for 3.34.3. KDMATT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KDMATT (Signed Saturating Double Multiply Addition T16 x T16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
 * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
 * result into the destination register. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
 * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
 * the OV flag is set to 1. The result after saturation is written to Rd.
 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
 * set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult = aop * bop;
 *   resQ31 = Mresult << 1;
 * } else {
 *   resQ31 = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * resadd = Rd + resQ31; // RV32
 * resadd = Rd.W[0] + resQ31; // RV64
 * if (resadd > (2^31)-1) {
 *   resadd = (2^31)-1;
 *   OV = 1;
 * } else if (resadd < -2^31) {
 *   resadd = -2^31;
 *   OV = 1;
 * }
 * Rd = resadd; // RV32
 * Rd = SE(resadd); // RV64
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KDMATT(long t, unsigned int a, unsigned int b)
{
    __ASM volatile("kdmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.34.3. KDMATT ===== */

/* ===== Inline Function Start for 3.35.1. KHM8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
 * \brief KHM8 (SIMD Signed Saturating Q7 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KHM8 Rd, Rs1, Rs2
 * KHMX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
 * numbers again.
 *
 * **Description**:\n
 * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
 * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
 * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
 * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
 * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
 * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
 * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
 * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
 * The result will be saturated to 0x7F and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * if (is `KHM8`) {
 *   op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
 *   op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
 * } else if (is `KHMX8`) {
 *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
 *   op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x80 != aop | 0x80 != bop) {
 *     res = (aop s* bop) >> 7;
 *   } else {
 *     res= 0x7F;
 *     OV = 1;
 *   }
 * }
 * Rd.H[x/2] = concat(rest, resb);
 * for RV32, x=0,2
 * for RV64, x=0,2,4,6
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KHM8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("khm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.35.1. KHM8 ===== */

/* ===== Inline Function Start for 3.35.2. KHMX8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
 * \brief KHMX8 (SIMD Signed Saturating Crossed Q7 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KHM8 Rd, Rs1, Rs2
 * KHMX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
 * numbers again.
 *
 * **Description**:\n
 * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
 * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
 * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
 * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
 * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
 * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
 * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
 * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
 * The result will be saturated to 0x7F and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * if (is `KHM8`) {
 *   op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
 *   op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
 * } else if (is `KHMX8`) {
 *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
 *   op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x80 != aop | 0x80 != bop) {
 *     res = (aop s* bop) >> 7;
 *   } else {
 *     res= 0x7F;
 *     OV = 1;
 *   }
 * }
 * Rd.H[x/2] = concat(rest, resb);
 * for RV32, x=0,2
 * for RV64, x=0,2,4,6
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KHMX8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("khmx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.35.2. KHMX8 ===== */

/* ===== Inline Function Start for 3.36.1. KHM16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
 * \brief KHM16 (SIMD Signed Saturating Q15 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KHM16 Rd, Rs1, Rs2
 * KHMX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
 * Q15 numbers again.
 *
 * **Description**:\n
 * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
 * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
 * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
 * Rs2.
 * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
 * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
 * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
 * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
 * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
 * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * if (is `KHM16`) {
 *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
 *   op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
 * } else if (is `KHMX16`) {
 *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
 *   op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x8000 != aop | 0x8000 != bop) {
 *     res = (aop s* bop) >> 15;
 *   } else {
 *     res= 0x7FFF;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x/2] = concat(rest, resb);
 * for RV32: x=0
 * for RV64: x=0,2
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KHM16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("khm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.36.1. KHM16 ===== */

/* ===== Inline Function Start for 3.36.2. KHMX16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
 * \brief KHMX16 (SIMD Signed Saturating Crossed Q15 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KHM16 Rd, Rs1, Rs2
 * KHMX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
 * Q15 numbers again.
 *
 * **Description**:\n
 * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
 * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
 * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
 * Rs2.
 * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
 * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
 * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
 * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
 * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
 * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * if (is `KHM16`) {
 *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
 *   op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
 * } else if (is `KHMX16`) {
 *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
 *   op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x8000 != aop | 0x8000 != bop) {
 *     res = (aop s* bop) >> 15;
 *   } else {
 *     res= 0x7FFF;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x/2] = concat(rest, resb);
 * for RV32: x=0
 * for RV64: x=0,2
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KHMX16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("khmx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.36.2. KHMX16 ===== */

/* ===== Inline Function Start for 3.37.1. KHMBB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
 * \brief KHMBB (Signed Saturating Half Multiply B16 x B16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
 * number again and saturate the Q15 result into the destination register. If saturation happens, an
 * overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
 * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
 * to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult[31:0] = aop * bop;
 *   res[15:0] = Mresult[30:15];
 * } else {
 *   res[15:0] = 0x7FFF;
 *   OV = 1;
 * }
 * Rd = SE32(res[15:0]); // Rv32
 * Rd = SE64(res[15:0]); // RV64
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KHMBB(unsigned int a, unsigned int b)
{
    long result;
    __ASM volatile("khmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.37.1. KHMBB ===== */

/* ===== Inline Function Start for 3.37.2. KHMBT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
 * \brief KHMBT (Signed Saturating Half Multiply B16 x T16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
 * number again and saturate the Q15 result into the destination register. If saturation happens, an
 * overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
 * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
 * to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult[31:0] = aop * bop;
 *   res[15:0] = Mresult[30:15];
 * } else {
 *   res[15:0] = 0x7FFF;
 *   OV = 1;
 * }
 * Rd = SE32(res[15:0]); // Rv32
 * Rd = SE64(res[15:0]); // RV64
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KHMBT(unsigned int a, unsigned int b)
{
    long result;
    __ASM volatile("khmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.37.2. KHMBT ===== */

/* ===== Inline Function Start for 3.37.3. KHMTT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
 * \brief KHMTT (Signed Saturating Half Multiply T16 x T16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
 * number again and saturate the Q15 result into the destination register. If saturation happens, an
 * overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
 * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
 * to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult[31:0] = aop * bop;
 *   res[15:0] = Mresult[30:15];
 * } else {
 *   res[15:0] = 0x7FFF;
 *   OV = 1;
 * }
 * Rd = SE32(res[15:0]); // Rv32
 * Rd = SE64(res[15:0]); // RV64
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KHMTT(unsigned int a, unsigned int b)
{
    long result;
    __ASM volatile("khmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.37.3. KHMTT ===== */

/* ===== Inline Function Start for 3.38.1. KMABB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMABB (SIMD Saturating Signed Multiply Bottom Halfs & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMABB Rd, Rs1, Rs2
 * KMABT Rd, Rs1, Rs2
 * KMATT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
 * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
 * third register. The addition result may be saturated and is written to the third register.
 * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
 * * KMABT rd.W[x] + bottom*top (per 32-bit element)
 * * KMATT rd.W[x] + top*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2.
 * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the top 16-bit content of 32-bit elements in Rs2.
 * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * top 16-bit content of 32-bit elements in Rs2.
 * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
 * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
 * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
 * signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMABB(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.38.1. KMABB ===== */

/* ===== Inline Function Start for 3.38.2. KMABT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMABT (SIMD Saturating Signed Multiply Bottom & Top Halfs & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMABB Rd, Rs1, Rs2
 * KMABT Rd, Rs1, Rs2
 * KMATT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
 * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
 * third register. The addition result may be saturated and is written to the third register.
 * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
 * * KMABT rd.W[x] + bottom*top (per 32-bit element)
 * * KMATT rd.W[x] + top*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2.
 * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the top 16-bit content of 32-bit elements in Rs2.
 * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * top 16-bit content of 32-bit elements in Rs2.
 * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
 * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
 * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
 * signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMABT(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.38.2. KMABT ===== */

/* ===== Inline Function Start for 3.38.3. KMATT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMATT (SIMD Saturating Signed Multiply Top Halfs & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMABB Rd, Rs1, Rs2
 * KMABT Rd, Rs1, Rs2
 * KMATT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
 * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
 * third register. The addition result may be saturated and is written to the third register.
 * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
 * * KMABT rd.W[x] + bottom*top (per 32-bit element)
 * * KMATT rd.W[x] + top*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2.
 * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the top 16-bit content of 32-bit elements in Rs2.
 * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * top 16-bit content of 32-bit elements in Rs2.
 * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
 * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
 * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
 * signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMATT(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.38.3. KMATT ===== */

/* ===== Inline Function Start for 3.39.1. KMADA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMADA (SIMD Saturating Signed Multiply Two Halfs and Two Adds)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMADA Rd, Rs1, Rs2
 * KMAXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds
 * the two 32-bit results and 32-bit elements in a third register together. The addition result may be
 * saturated.
 * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element)
 * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
 * elements in Rs2.
 * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying
 * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in
 * Rs2.
 * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31
 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit
 * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * // KMADA
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * // KMAXDA
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 * OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMADA(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.39.1. KMADA ===== */

/* ===== Inline Function Start for 3.39.2. KMAXDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMAXDA (SIMD Saturating Signed Crossed Multiply Two Halfs and Two Adds)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMADA Rd, Rs1, Rs2
 * KMAXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds
 * the two 32-bit results and 32-bit elements in a third register together. The addition result may be
 * saturated.
 * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element)
 * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
 * elements in Rs2.
 * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying
 * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in
 * Rs2.
 * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31
 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit
 * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * // KMADA
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * // KMAXDA
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 * OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMAXDA(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmaxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.39.2. KMAXDA ===== */

/* ===== Inline Function Start for 3.40.1. KMADS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMADS (SIMD Saturating Signed Multiply Two Halfs & Subtract & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMADS Rd, Rs1, Rs2
 * KMADRS Rd, Rs1, Rs2
 * KMAXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
 * the corresponding 32-bit elements in a third register. The addition result may be saturated.
 * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
 * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
 * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
 * elements in Rs2.
 * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
 * bit elements in Rs2.
 * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
 * elements in Rs2.
 * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
 * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
 * and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * // KMADS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * // KMADRS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
 * // KMAXDS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMADS(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmads %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.40.1. KMADS ===== */

/* ===== Inline Function Start for 3.40.2. KMADRS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMADRS (SIMD Saturating Signed Multiply Two Halfs & Reverse Subtract & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMADS Rd, Rs1, Rs2
 * KMADRS Rd, Rs1, Rs2
 * KMAXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
 * the corresponding 32-bit elements in a third register. The addition result may be saturated.
 * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
 * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
 * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
 * elements in Rs2.
 * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
 * bit elements in Rs2.
 * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
 * elements in Rs2.
 * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
 * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
 * and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * // KMADS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * // KMADRS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
 * // KMAXDS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMADRS(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmadrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.40.2. KMADRS ===== */

/* ===== Inline Function Start for 3.40.3. KMAXDS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMAXDS (SIMD Saturating Signed Crossed Multiply Two Halfs & Subtract & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMADS Rd, Rs1, Rs2
 * KMADRS Rd, Rs1, Rs2
 * KMAXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
 * the corresponding 32-bit elements in a third register. The addition result may be saturated.
 * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
 * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
 * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
 * elements in Rs2.
 * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
 * bit elements in Rs2.
 * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
 * elements in Rs2.
 * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
 * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
 * and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * // KMADS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * // KMADRS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
 * // KMAXDS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMAXDS(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmaxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.40.3. KMAXDS ===== */

/* ===== Inline Function Start for 3.41. KMAR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief KMAR64 (Signed Multiply and Saturating Add to 64-Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * KMAR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication
 * results to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
 * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds
 * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by
 * Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the Q63 number range (-2^63 <=
 * Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated result is written back
 * to the even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
 * pair includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
 * adds the 64-bit multiplication results to the 64-bit signed data of Rd with unlimited precision. If the
 * 64-bit addition result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range
 * and the OV bit is set to 1. The saturated result is written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * result = R[t_H].R[t_L] + (Rs1 * Rs2);
 * if (result > (2^63)-1) {
 *   result = (2^63)-1; OV = 1;
 * } else if (result < -2^63) {
 *   result = -2^63; OV = 1;
 * }
 * R[t_H].R[t_L] = result;
 * RV64:
 * // `result` has unlimited precision
 * result = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]);
 * if (result > (2^63)-1) {
 *   result = (2^63)-1; OV = 1;
 * } else if (result < -2^63) {
 *   result = -2^63; OV = 1;
 * }
 * Rd = result;
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_KMAR64(long long t, long a, long b)
{
    __ASM volatile("kmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.41. KMAR64 ===== */

/* ===== Inline Function Start for 3.42.1. KMDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMDA (SIMD Signed Multiply Two Halfs and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMDA Rd, Rs1, Rs2
 * KMXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * adds the two 32-bit results together. The addition result may be saturated.
 * * KMDA: top*top + bottom*bottom (per 32-bit element)
 * * KMXDA: top*bottom + bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
 * bit elements of Rs2.
 * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
 * 32-bit elements of Rs2.
 * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1.
 * The final results are written to Rd. The 16-bit contents are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * if  Rs1.W[x]  !=  0x80008000)  or  (Rs2.W[x]  !=  0x80008000  {  //  KMDA  Rd.W[x]  =  Rs1.W[x].H[1]  *
 * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0])
 * +  (Rs1.W[x].H[0]  *  Rs2.W[x].H[1];  }  else  {  Rd.W[x]  =  0x7fffffff;  OV  =  1;  }  for  RV32:  x=0  for  RV64:
 * x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMDA(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("kmda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.42.1. KMDA ===== */

/* ===== Inline Function Start for 3.42.2. KMXDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMXDA (SIMD Signed Crossed Multiply Two Halfs and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMDA Rd, Rs1, Rs2
 * KMXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * adds the two 32-bit results together. The addition result may be saturated.
 * * KMDA: top*top + bottom*bottom (per 32-bit element)
 * * KMXDA: top*bottom + bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
 * bit elements of Rs2.
 * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
 * 32-bit elements of Rs2.
 * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1.
 * The final results are written to Rd. The 16-bit contents are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * if  Rs1.W[x]  !=  0x80008000)  or  (Rs2.W[x]  !=  0x80008000  {  //  KMDA  Rd.W[x]  =  Rs1.W[x].H[1]  *
 * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0])
 * +  (Rs1.W[x].H[0]  *  Rs2.W[x].H[1];  }  else  {  Rd.W[x]  =  0x7fffffff;  OV  =  1;  }  for  RV32:  x=0  for  RV64:
 * x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMXDA(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("kmxda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.42.2. KMXDA ===== */

/* ===== Inline Function Start for 3.43.1. KMMAC ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief KMMAC (SIMD Saturating MSW Signed Multiply Word and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAC Rd, Rs1, Rs2
 * KMMAC.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of two registers and add the most significant
 * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are
 * saturated first and then written back to the third register. The `.u` form performs an additional
 * rounding up operation on the multiplication results before adding the most significant 32-bit part
 * of the results.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
 * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
 * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][63:31] + 1;
 *   res[x] = Rd.W[x] + Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] + Mres[x][63:32];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAC(long t, long a, long b)
{
    __ASM volatile("kmmac %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.43.1. KMMAC ===== */

/* ===== Inline Function Start for 3.43.2. KMMAC.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief KMMAC.u (SIMD Saturating MSW Signed Multiply Word and Add with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAC Rd, Rs1, Rs2
 * KMMAC.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of two registers and add the most significant
 * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are
 * saturated first and then written back to the third register. The `.u` form performs an additional
 * rounding up operation on the multiplication results before adding the most significant 32-bit part
 * of the results.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
 * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
 * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][63:31] + 1;
 *   res[x] = Rd.W[x] + Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] + Mres[x][63:32];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAC_U(long t, long a, long b)
{
    __ASM volatile("kmmac.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.43.2. KMMAC.u ===== */

/* ===== Inline Function Start for 3.44.1. KMMAWB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWB (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWB Rd, Rs1, Rs2
 * KMMAWB.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register and add the most significant 32-bit results with
 * the corresponding signed 32-bit elements of a third register. The addition result is written to the
 * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
 * results from the most significant discarded bit before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
 * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication
 * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
 * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
 * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
 * bit 15 of the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   res[x] = Rd.W[x] + Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] + Mres[x][47:16];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWB(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.44.1. KMMAWB ===== */

/* ===== Inline Function Start for 3.44.2. KMMAWB.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWB.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWB Rd, Rs1, Rs2
 * KMMAWB.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register and add the most significant 32-bit results with
 * the corresponding signed 32-bit elements of a third register. The addition result is written to the
 * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
 * results from the most significant discarded bit before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
 * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication
 * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
 * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
 * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
 * bit 15 of the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   res[x] = Rd.W[x] + Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] + Mres[x][47:16];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWB_U(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.44.2. KMMAWB.u ===== */

/* ===== Inline Function Start for 3.45.1. KMMAWB2 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWB2 Rd, Rs1, Rs2
 * KMMAWB2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and add the
 * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
 * register. The saturated addition result is written to the corresponding 32-bit elements of the third
 * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
 * before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
 * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
 * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
 * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
 * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
 * the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
 *   addop.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
 *   if (`.u` form) {
 *     Mres[x][47:14] = Mres[x][47:14] + 1;
 *   }
 *   addop.W[x] = Mres[x][46:15]; // doubling
 * }
 * res[x] = Rd.W[x] + addop.W[x];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWB2(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawb2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.45.1. KMMAWB2 ===== */

/* ===== Inline Function Start for 3.45.2. KMMAWB2.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWB2 Rd, Rs1, Rs2
 * KMMAWB2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and add the
 * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
 * register. The saturated addition result is written to the corresponding 32-bit elements of the third
 * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
 * before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
 * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
 * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
 * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
 * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
 * the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
 *   addop.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
 *   if (`.u` form) {
 *     Mres[x][47:14] = Mres[x][47:14] + 1;
 *   }
 *   addop.W[x] = Mres[x][46:15]; // doubling
 * }
 * res[x] = Rd.W[x] + addop.W[x];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWB2_U(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawb2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.45.2. KMMAWB2.u ===== */

/* ===== Inline Function Start for 3.46.1. KMMAWT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWT (SIMD Saturating MSW Signed Multiply Word and Top Half and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWT Rd, Rs1, Rs2
 * KMMAWT.u Rd Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the
 * corresponding 32-bit elements of another register and add the most significant 32-bit results with
 * the corresponding signed 32-bit elements of a third register. The addition results are written to the
 * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
 * results from the most significant discarded bit before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the
 * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results
 * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
 * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
 * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
 * bit 15 of the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   res[x] = Rd.W[x] + Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] + Mres[x][47:16];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWT(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.46.1. KMMAWT ===== */

/* ===== Inline Function Start for 3.46.2. KMMAWT.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWT.u (SIMD Saturating MSW Signed Multiply Word and Top Half and Add with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWT Rd, Rs1, Rs2
 * KMMAWT.u Rd Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the
 * corresponding 32-bit elements of another register and add the most significant 32-bit results with
 * the corresponding signed 32-bit elements of a third register. The addition results are written to the
 * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
 * results from the most significant discarded bit before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the
 * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results
 * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
 * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
 * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
 * bit 15 of the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   res[x] = Rd.W[x] + Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] + Mres[x][47:16];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWT_U(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawt.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.46.2. KMMAWT.u ===== */

/* ===== Inline Function Start for 3.47.1. KMMAWT2 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWT2 Rd, Rs1, Rs2
 * KMMAWT2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit elements of one register and the top 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and add the
 * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
 * register. The saturated addition result is written to the corresponding 32-bit elements of the third
 * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
 * before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
 * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
 * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
 * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
 * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
 * the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
 *   addop.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
 *   if (`.u` form) {
 *     Mres[x][47:14] = Mres[x][47:14] + 1;
 *   }
 *   addop.W[x] = Mres[x][46:15]; // doubling
 * }
 * res[x] = Rd.W[x] + addop.W[x];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWT2(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawt2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.47.1. KMMAWT2 ===== */

/* ===== Inline Function Start for 3.47.2. KMMAWT2.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWT2 Rd, Rs1, Rs2
 * KMMAWT2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit elements of one register and the top 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and add the
 * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
 * register. The saturated addition result is written to the corresponding 32-bit elements of the third
 * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
 * before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
 * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
 * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
 * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
 * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
 * the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
 *   addop.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
 *   if (`.u` form) {
 *     Mres[x][47:14] = Mres[x][47:14] + 1;
 *   }
 *   addop.W[x] = Mres[x][46:15]; // doubling
 * }
 * res[x] = Rd.W[x] + addop.W[x];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWT2_U(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawt2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.47.2. KMMAWT2.u ===== */

/* ===== Inline Function Start for 3.48.1. KMMSB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief KMMSB (SIMD Saturating MSW Signed Multiply Word and Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMSB Rd, Rs1, Rs2
 * KMMSB.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of two registers and subtract the most
 * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results
 * are written to the third register. The `.u` form performs an additional rounding up operation on
 * the multiplication results before subtracting the most significant 32-bit part of the results.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
 * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
 * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][63:31] + 1;
 *   res[x] = Rd.W[x] - Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] - Mres[x][63:32];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMSB(long t, long a, long b)
{
    __ASM volatile("kmmsb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.48.1. KMMSB ===== */

/* ===== Inline Function Start for 3.48.2. KMMSB.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief KMMSB.u (SIMD Saturating MSW Signed Multiply Word and Subtraction with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMSB Rd, Rs1, Rs2
 * KMMSB.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of two registers and subtract the most
 * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results
 * are written to the third register. The `.u` form performs an additional rounding up operation on
 * the multiplication results before subtracting the most significant 32-bit part of the results.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
 * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
 * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][63:31] + 1;
 *   res[x] = Rd.W[x] - Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] - Mres[x][63:32];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMSB_U(long t, long a, long b)
{
    __ASM volatile("kmmsb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.48.2. KMMSB.u ===== */

/* ===== Inline Function Start for 3.49.1. KMMWB2 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMWB2 Rd, Rs1, Rs2
 * KMMWB2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and write the
 * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
 * form rounds up the results from the most significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
 * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
 * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
 *   Rd.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
 *   if (`.u` form) {
 *     Round[x][32:0] = Mres[x][46:14] + 1;
 *     Rd.W[x] = Round[x][32:1];
 *   } else {
 *     Rd.W[x] = Mres[x][46:15];
 *   }
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMWB2(long a, unsigned long b)
{
    long result;
    __ASM volatile("kmmwb2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.49.1. KMMWB2 ===== */

/* ===== Inline Function Start for 3.49.2. KMMWB2.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMWB2 Rd, Rs1, Rs2
 * KMMWB2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and write the
 * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
 * form rounds up the results from the most significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
 * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
 * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
 *   Rd.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
 *   if (`.u` form) {
 *     Round[x][32:0] = Mres[x][46:14] + 1;
 *     Rd.W[x] = Round[x][32:1];
 *   } else {
 *     Rd.W[x] = Mres[x][46:15];
 *   }
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMWB2_U(long a, unsigned long b)
{
    long result;
    __ASM volatile("kmmwb2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.49.2. KMMWB2.u ===== */

/* ===== Inline Function Start for 3.50.1. KMMWT2 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMWT2 Rd, Rs1, Rs2
 * KMMWT2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and write the
 * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
 * form rounds up the results from the most significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
 * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
 * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
 *   Rd.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
 *   if (`.u` form) {
 *     Round[x][32:0] = Mres[x][46:14] + 1;
 *     Rd.W[x] = Round[x][32:1];
 *   } else {
 *     Rd.W[x] = Mres[x][46:15];
 *   }
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMWT2(long a, unsigned long b)
{
    long result;
    __ASM volatile("kmmwt2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.50.1. KMMWT2 ===== */

/* ===== Inline Function Start for 3.50.2. KMMWT2.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMWT2 Rd, Rs1, Rs2
 * KMMWT2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and write the
 * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
 * form rounds up the results from the most significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
 * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
 * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
 *   Rd.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
 *   if (`.u` form) {
 *     Round[x][32:0] = Mres[x][46:14] + 1;
 *     Rd.W[x] = Round[x][32:1];
 *   } else {
 *     Rd.W[x] = Mres[x][46:15];
 *   }
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMWT2_U(long a, unsigned long b)
{
    long result;
    __ASM volatile("kmmwt2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.50.2. KMMWT2.u ===== */

/* ===== Inline Function Start for 3.51.1. KMSDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMSDA (SIMD Saturating Signed Multiply Two Halfs & Add & Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMSDA Rd, Rs1, Rs2
 * KMSXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The
 * subtraction result may be saturated.
 * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element)
 * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
 * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
 * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32-
 * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
 * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The
 * 16-bit contents are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * // KMSDA
 * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * // KMSXDA
 * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMSDA(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmsda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.51.1. KMSDA ===== */

/* ===== Inline Function Start for 3.51.2. KMSXDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMSXDA (SIMD Saturating Signed Crossed Multiply Two Halfs & Add & Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMSDA Rd, Rs1, Rs2
 * KMSXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The
 * subtraction result may be saturated.
 * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element)
 * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
 * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
 * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32-
 * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
 * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The
 * 16-bit contents are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * // KMSDA
 * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * // KMSXDA
 * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMSXDA(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmsxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.51.2. KMSXDA ===== */

/* ===== Inline Function Start for 3.52. KMSR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief KMSR64 (Signed Multiply and Saturating Subtract from 64-Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * KMSR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication
 * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
 * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It
 * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers
 * specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the Q63
 * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated
 * result is written back to the even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
 * subtracts the 64-bit multiplication results from the 64-bit signed data in Rd with unlimited
 * precision. If the 64-bit subtraction result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is
 * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * result = R[t_H].R[t_L] - (Rs1 * Rs2);
 * if (result > (2^63)-1) {
 *   result = (2^63)-1; OV = 1;
 * } else if (result < -2^63) {
 *   result = -2^63; OV = 1;
 * }
 * R[t_H].R[t_L] = result;
 * RV64:
 * // `result` has unlimited precision
 * result = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]);
 * if (result > (2^63)-1) {
 *   result = (2^63)-1; OV = 1;
 * } else if (result < -2^63) {
 *   result = -2^63; OV = 1;
 * }
 * Rd = result;
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_KMSR64(long long t, long a, long b)
{
    __ASM volatile("kmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.52. KMSR64 ===== */

/* ===== Inline Function Start for 3.53. KSLLW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KSLLW (Saturating Shift Left Logical for Word)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KSLLW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do logical left shift operation with saturation on a 32-bit word. The shift amount is a
 * variable from a GPR.
 *
 * **Description**:\n
 * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with
 * zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register. Any
 * shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated
 * to -2^31. And the saturated result is sign-extended and written to Rd. If any saturation is performed,
 * set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[4:0];
 * res[(31+sa):0] = Rs1.W[0] << sa;
 * if (res > (2^31)-1) {
 *   res = 0x7fffffff; OV = 1;
 * } else if (res < -2^31) {
 *   res = 0x80000000; OV = 1;
 * }
 * Rd[31:0] = res[31:0]; // RV32
 * Rd[63:0] = SE(res[31:0]); // RV64
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KSLLW(long a, unsigned int b)
{
    long result;
    __ASM volatile("ksllw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.53. KSLLW ===== */

/* ===== Inline Function Start for 3.54. KSLLIW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KSLLIW (Saturating Shift Left Logical Immediate for Word)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KSLLIW Rd, Rs1, imm5u
 * ~~~
 *
 * **Purpose**:\n
 * Do logical left shift operation with saturation on a 32-bit word. The shift amount is an
 * immediate value.
 *
 * **Description**:\n
 * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with
 * zero and the shift amount is specified by the imm5u constant. Any shifted value greater than 2^31-1 is
 * saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated result is
 * sign-extended and written to Rd. If any saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u;
 * res[(31+sa):0] = Rs1.W[0] << sa;
 * if (res > (2^31)-1) {
 *   res = 0x7fffffff; OV = 1;
 * } else if (res < -2^31) {
 *   res = 0x80000000; OV = 1;
 * }
 * Rd[31:0] = res[31:0]; // RV32
 * Rd[63:0] = SE(res[31:0]); // RV64
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
#define __RV_KSLLIW(a, b)    \
    ({    \
        long result;    \
        long __a = (long)(a);    \
        __ASM volatile("kslliw %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.54. KSLLIW ===== */

/* ===== Inline Function Start for 3.55. KSLL8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief KSLL8 (SIMD 8-bit Saturating Shift Left Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLL8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift
 * amount is a variable from a GPR.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
 * with zero and the shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
 * Any shifted value greater than 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is
 * saturated to -2^7. And the saturated results are written to Rd. If any saturation is performed, set OV
 * bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[2:0];
 * if (sa != 0) {
 *   res[(7+sa):0] = Rs1.B[x] << sa;
 *   if (res > (2^7)-1) {
 *     res = 0x7f; OV = 1;
 *   } else if (res < -2^7) {
 *     res = 0x80; OV = 1;
 *   }
 *   Rd.B[x] = res[7:0];
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLL8(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("ksll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.55. KSLL8 ===== */

/* ===== Inline Function Start for 3.56. KSLLI8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief KSLLI8 (SIMD 8-bit Saturating Shift Left Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLLI8 Rd, Rs1, imm3u
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift
 * amount is an immediate value.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
 * with zero and the shift amount is specified by the imm3u constant. Any shifted value greater than
 * 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is saturated to -2^7. And the saturated
 * results are written to Rd. If any saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm3u[2:0];
 * if (sa != 0) {
 *   res[(7+sa):0] = Rs1.B[x] << sa;
 *   if (res > (2^7)-1) {
 *     res = 0x7f; OV = 1;
 *   } else if (res < -2^7) {
 *     res = 0x80; OV = 1;
 *   }
 *   Rd.B[x] = res[7:0];
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_KSLLI8(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("kslli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.56. KSLLI8 ===== */

/* ===== Inline Function Start for 3.57. KSLL16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief KSLL16 (SIMD 16-bit Saturating Shift Left Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLL16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift
 * amount is a variable from a GPR.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
 * with zero and the shift amount is specified by the low-order 4-bits of the value in the Rs2 register.
 * Any shifted value greater than 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is
 * saturated to -2^15. And the saturated results are written to Rd. If any saturation is performed, set OV
 * bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[3:0];
 * if (sa != 0) {
 *   res[(15+sa):0] = Rs1.H[x] << sa;
 *   if (res > (2^15)-1) {
 *     res = 0x7fff; OV = 1;
 *   } else if (res < -2^15) {
 *     res = 0x8000; OV = 1;
 *   }
 *   Rd.H[x] = res[15:0];
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLL16(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("ksll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.57. KSLL16 ===== */

/* ===== Inline Function Start for 3.58. KSLLI16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief KSLLI16 (SIMD 16-bit Saturating Shift Left Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLLI16 Rd, Rs1, imm4u
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift
 * amount is an immediate value.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
 * with zero and the shift amount is specified by the imm4u constant. Any shifted value greater than
 * 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is saturated to -2^15. And the saturated
 * results are written to Rd. If any saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm4u[3:0];
 * if (sa != 0) {
 *   res[(15+sa):0] = Rs1.H[x] << sa;
 *   if (res > (2^15)-1) {
 *     res = 0x7fff; OV = 1;
 *   } else if (res < -2^15) {
 *     res = 0x8000; OV = 1;
 *   }
 *   Rd.H[x] = res[15:0];
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_KSLLI16(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("kslli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.58. KSLLI16 ===== */

/* ===== Inline Function Start for 3.59.1. KSLRA8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief KSLRA8 (SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLRA8 Rd, Rs1, Rs2
 * KSLRA8.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the
 * right shift.
 *
 * **Description**:\n
 * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
 * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
 * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
 * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form
 * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[3:0] < 0) {
 *   sa = -Rs2[3:0];
 *   sa = (sa == 8)? 7 : sa;
 *   if (`.u` form) {
 *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[7:0];
 *   } else {
 *     Rd.B[x] = SE8(Rs1.B[x][7:sa]);
 *   }
 * } else {
 *   sa = Rs2[2:0];
 *   res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
 *   if (res > (2^7)-1) {
 *     res[7:0] = 0x7f; OV = 1;
 *   } else if (res < -2^7) {
 *     res[7:0] = 0x80; OV = 1;
 *   }
 *   Rd.B[x] = res[7:0];
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLRA8(unsigned long a, int b)
{
    unsigned long result;
    __ASM volatile("kslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.59.1. KSLRA8 ===== */

/* ===== Inline Function Start for 3.59.2. KSLRA8.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief KSLRA8.u (SIMD 8-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLRA8 Rd, Rs1, Rs2
 * KSLRA8.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the
 * right shift.
 *
 * **Description**:\n
 * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
 * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
 * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
 * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form
 * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[3:0] < 0) {
 *   sa = -Rs2[3:0];
 *   sa = (sa == 8)? 7 : sa;
 *   if (`.u` form) {
 *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[7:0];
 *   } else {
 *     Rd.B[x] = SE8(Rs1.B[x][7:sa]);
 *   }
 * } else {
 *   sa = Rs2[2:0];
 *   res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
 *   if (res > (2^7)-1) {
 *     res[7:0] = 0x7f; OV = 1;
 *   } else if (res < -2^7) {
 *     res[7:0] = 0x80; OV = 1;
 *   }
 *   Rd.B[x] = res[7:0];
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLRA8_U(unsigned long a, int b)
{
    unsigned long result;
    __ASM volatile("kslra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.59.2. KSLRA8.u ===== */

/* ===== Inline Function Start for 3.60.1. KSLRA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief KSLRA16 (SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLRA16 Rd, Rs1, Rs2
 * KSLRA16.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the
 * right shift.
 *
 * **Description**:\n
 * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
 * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
 * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
 * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u`
 * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[4:0] < 0) {
 *   sa = -Rs2[4:0];
 *   sa = (sa == 16)? 15 : sa;
 *   if (`.u` form) {
 *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[15:0];
 *   } else {
 *     Rd.H[x] = SE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   sa = Rs2[3:0];
 *   res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
 *   if (res > (2^15)-1) {
 *     res[15:0] = 0x7fff; OV = 1;
 *   } else if (res < -2^15) {
 *     res[15:0] = 0x8000; OV = 1;
 *   }
 *   d.H[x] = res[15:0];
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLRA16(unsigned long a, int b)
{
    unsigned long result;
    __ASM volatile("kslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.60.1. KSLRA16 ===== */

/* ===== Inline Function Start for 3.60.2. KSLRA16.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief KSLRA16.u (SIMD 16-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLRA16 Rd, Rs1, Rs2
 * KSLRA16.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the
 * right shift.
 *
 * **Description**:\n
 * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
 * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
 * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
 * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u`
 * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[4:0] < 0) {
 *   sa = -Rs2[4:0];
 *   sa = (sa == 16)? 15 : sa;
 *   if (`.u` form) {
 *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[15:0];
 *   } else {
 *     Rd.H[x] = SE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   sa = Rs2[3:0];
 *   res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
 *   if (res > (2^15)-1) {
 *     res[15:0] = 0x7fff; OV = 1;
 *   } else if (res < -2^15) {
 *     res[15:0] = 0x8000; OV = 1;
 *   }
 *   d.H[x] = res[15:0];
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLRA16_U(unsigned long a, int b)
{
    unsigned long result;
    __ASM volatile("kslra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.60.2. KSLRA16.u ===== */

/* ===== Inline Function Start for 3.61. KSLRAW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KSLRAW (Shift Left Logical with Q31 Saturation or Shift Right Arithmetic)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KSLRAW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31
 * saturation for the left shift on a 32-bit data.
 *
 * **Description**:\n
 * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
 * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31].
 * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. After the shift
 * operation, the final result is bit-31 sign-extended and written to Rd. If any saturation happens, this
 * instruction sets the OV flag. The value of Rs2[31:6] will not affected the operation of this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[5:0] < 0) {
 *   sa = -Rs2[5:0];
 *   sa = (sa == 32)? 31 : sa;
 *   res[31:0] = Rs1.W[0] >>(arith) sa;
 * } else {
 *   sa = Rs2[5:0];
 *   tmp = Rs1.W[0] <<(logic) sa;
 *   if (tmp > (2^31)-1) {
 *     res[31:0] = (2^31)-1;
 *     OV = 1;
 *   } else if (tmp < -2^31) {
 *     res[31:0] = -2^31;
 *     OV = 1
 *   } else {
 *     res[31:0] = tmp[31:0];
 *   }
 * }
 * Rd = res[31:0]; // RV32
 * Rd = SE64(res[31:0]); // RV64
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KSLRAW(int a, int b)
{
    long result;
    __ASM volatile("kslraw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.61. KSLRAW ===== */

/* ===== Inline Function Start for 3.62. KSLRAW.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KSLRAW.u (Shift Left Logical with Q31 Saturation or Rounding Shift Right Arithmetic)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KSLRAW.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31
 * saturation for the left shift and a rounding up operation for the right shift on a 32-bit data.
 *
 * **Description**:\n
 * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
 * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31].
 * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. The right-shifted
 * result is added a 1 to the most significant discarded bit position for rounding effect. After the shift,
 * saturation, or rounding, the final result is bit-31 sign-extended and written to Rd. If any saturation
 * happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect the operation of this
 * instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[5:0] < 0) {
 *   sa = -Rs2[5:0];
 *   sa = (sa == 32)? 31 : sa;
 *   res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
 *   rst[31:0] = res[31:0];
 * } else {
 *   sa = Rs2[5:0];
 *   tmp = Rs1.W[0] <<(logic) sa;
 *   if (tmp > (2^31)-1) {
 *     rst[31:0] = (2^31)-1;
 *     OV = 1;
 *   } else if (tmp < -2^31) {
 *     rst[31:0] = -2^31;
 *     OV = 1
 *   } else {
 *     rst[31:0] = tmp[31:0];
 *   }
 * }
 * Rd = rst[31:0]; // RV32
 * Rd = SE64(rst[31:0]); // RV64
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KSLRAW_U(int a, int b)
{
    long result;
    __ASM volatile("kslraw.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.62. KSLRAW.u ===== */

/* ===== Inline Function Start for 3.63. KSTAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief KSTAS16 (SIMD 16-bit Signed Saturating Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSTAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating addition and 16-bit signed integer element
 * saturating subtraction in a 32-bit chunk simultaneously. Operands are from corresponding
 * positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
 * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it
 * subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed
 * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
 * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for
 * subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
 * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSTAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.63. KSTAS16 ===== */

/* ===== Inline Function Start for 3.64. KSTSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief KSTSA16 (SIMD 16-bit Signed Saturating Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSTSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element
 * saturating addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in
 * 32-bit chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks
 * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it
 * adds the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 with the 16-bit signed integer
 * element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number range (-2^15
 * <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
 * written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd for
 * addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
 * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSTSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.64. KSTSA16 ===== */

/* ===== Inline Function Start for 3.65. KSUB8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief KSUB8 (SIMD 8-bit Signed Saturating Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSUB8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
 * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 27
 * -1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.B[x] - Rs2.B[x];
 * if (res[x] > (2^7)-1) {
 *   res[x] = (2^7)-1;
 *   OV = 1;
 * } else if (res[x] < -2^7) {
 *   res[x] = -2^7;
 *   OV = 1;
 * }
 * Rd.B[x] = res[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSUB8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.65. KSUB8 ===== */

/* ===== Inline Function Start for 3.66. KSUB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief KSUB16 (SIMD 16-bit Signed Saturating Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSUB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
 * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <=
 * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.H[x] - Rs2.H[x];
 * if (res[x] > (2^15)-1) {
 *   res[x] = (2^15)-1;
 *   OV = 1;
 * } else if (res[x] < -2^15) {
 *   res[x] = -2^15;
 *   OV = 1;
 * }
 * Rd.H[x] = res[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSUB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.66. KSUB16 ===== */

/* ===== Inline Function Start for 3.67. KSUB64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief KSUB64 (64-bit Signed Saturating Subtraction)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * KSUB64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform a 64-bit signed integer subtraction. The result is saturated to the Q63 range.
 *
 * **RV32 Description**:\n
 * This instruction subtracts the 64-bit signed integer of an even/odd pair of
 * registers specified by Rs2(4,1) from the 64-bit signed integer of an even/odd pair of registers
 * specified by Rs1(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is
 * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd
 * pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * This instruction subtracts the 64-bit signed integer of Rs2 from the 64-bit signed
 * integer of Rs1. If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated
 * to the range and the OV bit is set to 1. The saturated result is then written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
 * result = R[a_H].R[a_L] - R[b_H].R[b_L];
 * if (result > (2^63)-1) {
 *   result = (2^63)-1; OV = 1;
 * } else if (result < -2^63) {
 *   result = -2^63; OV = 1;
 * }
 * R[t_H].R[t_L] = result;
 * RV64:
 * result = Rs1 - Rs2;
 * if (result > (2^63)-1) {
 *   result = (2^63)-1; OV = 1;
 * } else if (result < -2^63) {
 *   result = -2^63; OV = 1;
 * }
 * Rd = result;
 * ~~~
 *
 * \param [in]  a    long long type of value stored in a
 * \param [in]  b    long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_KSUB64(long long a, long long b)
{
    long long result;
    __ASM volatile("ksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.67. KSUB64 ===== */

/* ===== Inline Function Start for 3.68. KSUBH ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
 * \brief KSUBH (Signed Subtraction with Q15 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KSUBH Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Subtract the signed lower 32-bit content of two registers with Q15 saturation.
 *
 * **Description**:\n
 * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit
 * content of Rs1. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then
 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] - Rs2.W[0];
 * if (tmp > (2^15)-1) {
 *   res = (2^15)-1;
 *   OV = 1;
 * } else if (tmp < -2^15) {
 *   res = -2^15;
 *   OV = 1
 * } else {
 *   res = tmp;
 * }
 * Rd = SE(res[15:0]);
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KSUBH(int a, int b)
{
    long result;
    __ASM volatile("ksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.68. KSUBH ===== */

/* ===== Inline Function Start for 3.69. KSUBW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KSUBW (Signed Subtraction with Q31 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KSUBW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Subtract the signed lower 32-bit content of two registers with Q31 saturation.
 *
 * **Description**:\n
 * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit
 * content of Rs1. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then
 * sign-extened and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] - Rs2.W[0];
 * if (tmp > (2^31)-1) {
 *   res = (2^31)-1;
 *   OV = 1;
 * } else if (tmp < -2^31) {
 * res = -2^31;
 *   OV = 1
 * } else {
 *   res = tmp;
 * }
 * Rd = res[31:0]; // RV32
 * Rd = SE(res[31:0]); // RV64
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KSUBW(int a, int b)
{
    long result;
    __ASM volatile("ksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.69. KSUBW ===== */

/* ===== Inline Function Start for 3.70.1. KWMMUL ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief KWMMUL (SIMD Saturating MSW Signed Multiply Word & Double)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KWMMUL Rd, Rs1, Rs2
 * KWMMUL.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit,
 * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally
 * rounds up the multiplication results from the most signification discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
 * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
 * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
 * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u`
 * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
 * 30 before the shift and saturation operations.
 *
 * **Operations**:\n
 * ~~~
 * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) {
 *   Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 *   if (`.u` form) {
 *     Round[x][33:0] = Mres[x][63:30] + 1;
 *     Rd.W[x] = Round[x][32:1];
 *   } else {
 *     Rd.W[x] = Mres[x][62:31];
 *   }
 * } else {
 *   Rd.W[x] = 0x7fffffff;
 *   OV = 1;
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KWMMUL(long a, long b)
{
    long result;
    __ASM volatile("kwmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.70.1. KWMMUL ===== */

/* ===== Inline Function Start for 3.70.2. KWMMUL.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief KWMMUL.u (SIMD Saturating MSW Signed Multiply Word & Double with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KWMMUL Rd, Rs1, Rs2
 * KWMMUL.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit,
 * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally
 * rounds up the multiplication results from the most signification discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
 * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
 * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
 * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u`
 * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
 * 30 before the shift and saturation operations.
 *
 * **Operations**:\n
 * ~~~
 * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) {
 *   Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 *   if (`.u` form) {
 *     Round[x][33:0] = Mres[x][63:30] + 1;
 *     Rd.W[x] = Round[x][32:1];
 *   } else {
 *     Rd.W[x] = Mres[x][62:31];
 *   }
 * } else {
 *   Rd.W[x] = 0x7fffffff;
 *   OV = 1;
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KWMMUL_U(long a, long b)
{
    long result;
    __ASM volatile("kwmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.70.2. KWMMUL.u ===== */

/* ===== Inline Function Start for 3.71. MADDR32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief MADDR32 (Multiply and Add to 32-Bit Word)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * MADDR32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit contents of two registers and add the lower 32-bit multiplication result
 * to the 32-bit content of a destination register. Write the final result back to the destination register.
 *
 * **Description**:\n
 * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2. It adds the
 * lower 32-bit multiplication result to the lower 32-bit content of Rd and writes the final result (RV32)
 * or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either signed or
 * unsigned integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mresult = Rs1 * Rs2;
 * Rd = Rd + Mresult.W[0];
 * RV64:
 * Mresult = Rs1.W[0] * Rs2.W[0];
 * tres[31:0] = Rd.W[0] + Mresult.W[0];
 * Rd = SE64(tres[31:0]);
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_MADDR32(unsigned long t, unsigned long a, unsigned long b)
{
    __ASM volatile("maddr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.71. MADDR32 ===== */

/* ===== Inline Function Start for 3.72. MAXW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief MAXW (32-bit Signed Word Maximum)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * MAXW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Get the larger value from the 32-bit contents of two general registers.
 *
 * **Description**:\n
 * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the
 * larger value as the result, and writes the result to Rd.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs1.W[0] >= Rs2.W[0]) {
 *   Rd = SE(Rs1.W[0]);
 * } else {
 *   Rd = SE(Rs2.W[0]);
 * }
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_MAXW(int a, int b)
{
    long result;
    __ASM volatile("maxw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.72. MAXW ===== */

/* ===== Inline Function Start for 3.73. MINW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief MINW (32-bit Signed Word Minimum)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * MINW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Get the smaller value from the 32-bit contents of two general registers.
 *
 * **Description**:\n
 * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the
 * smaller value as the result, and writes the result to Rd.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs1.W[0] >= Rs2.W[0]) { Rd = SE(Rs2.W[0]); } else { Rd = SE(Rs1.W[0]); }
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_MINW(int a, int b)
{
    long result;
    __ASM volatile("minw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.73. MINW ===== */

/* ===== Inline Function Start for 3.74. MSUBR32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief MSUBR32 (Multiply and Subtract from 32-Bit Word)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * MSUBR32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit contents of two registers and subtract the lower 32-bit multiplication
 * result from the 32-bit content of a destination register. Write the final result back to the destination
 * register.
 *
 * **Description**:\n
 * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2, subtracts
 * the lower 32-bit multiplication result from the lower 32-bit content of Rd, then writes the final
 * result (RV32) or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either
 * signed or unsigned integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mresult = Rs1 * Rs2;
 * Rd = Rd - Mresult.W[0];
 * RV64:
 * Mresult = Rs1.W[0] * Rs2.W[0];
 * tres[31:0] = Rd.W[0] - Mresult.W[0];
 * Rd = SE64(tres[31:0]);
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_MSUBR32(unsigned long t, unsigned long a, unsigned long b)
{
    __ASM volatile("msubr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.74. MSUBR32 ===== */

/* ===== Inline Function Start for 3.75. MULR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief MULR64 (Multiply Word Unsigned to 64-bit Data)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * MULR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit unsigned integer contents of two registers and write the 64-bit result.
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit content of Rs1 with that of Rs2 and writes the 64-bit
 * multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d determines the
 * even/odd pair group of the two registers. Specifically, the register pair includes register 2d and
 * 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2 and writes the 64-bit
 * multiplication result to Rd.
 * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mresult = CONCAT(1`b0,Rs1) u* CONCAT(1`b0,Rs2);
 * R[Rd(4,1).1(0)][31:0] = Mresult[63:32];
 * R[Rd(4,1).0(0)][31:0] = Mresult[31:0];
 * RV64:
 * Rd = Mresult[63:0];
 * Mresult = CONCAT(1`b0,Rs1.W[0]) u* CONCAT(1`b0,Rs2.W[0]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_MULR64(unsigned long a, unsigned long b)
{
    unsigned long long result;
    __ASM volatile("mulr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.75. MULR64 ===== */

/* ===== Inline Function Start for 3.76. MULSR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief MULSR64 (Multiply Word Signed to 64-bit Data)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * MULSR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit signed integer contents of two registers and write the 64-bit result.
 *
 * **RV32 Description**:\n
 * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and
 * writes the 64-bit multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d
 * determines the even/odd pair group of the two registers. Specifically, the register pair includes
 * register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and
 * writes the 64-bit multiplication result to Rd.
 * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mresult = Ra s* Rb;
 * R[Rd(4,1).1(0)][31:0] = Mresult[63:32];
 * R[Rd(4,1).0(0)][31:0] = Mresult[31:0];
 * RV64:
 * Mresult = Ra.W[0] s* Rb.W[0];
 * Rd = Mresult[63:0];
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_MULSR64(long a, long b)
{
    long long result;
    __ASM volatile("mulsr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.76. MULSR64 ===== */

/* ===== Inline Function Start for 3.77. PBSAD ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
 * \brief PBSAD (Parallel Byte Sum of Absolute Difference)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * PBSAD Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Calculate the sum of absolute difference of unsigned 8-bit data elements.
 *
 * **Description**:\n
 * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. Then
 * it adds the absolute value of each difference together and writes the result to Rd.
 *
 * **Operations**:\n
 * ~~~
 * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]);
 * Rd = SUM(absdiff[x]);
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PBSAD(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pbsad %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.77. PBSAD ===== */

/* ===== Inline Function Start for 3.78. PBSADA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
 * \brief PBSADA (Parallel Byte Sum of Absolute Difference Accum)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * PBSADA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Calculate the sum of absolute difference of four unsigned 8-bit data elements and
 * accumulate it into a register.
 *
 * **Description**:\n
 * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. It
 * then adds the absolute value of each difference together along with the content of Rd and writes the
 * accumulated result back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]);
 * Rd = Rd + SUM(absdiff[x]);
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PBSADA(unsigned long t, unsigned long a, unsigned long b)
{
    __ASM volatile("pbsada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.78. PBSADA ===== */

/* ===== Inline Function Start for 3.79.1. PKBB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
 * \brief PKBB16 (Pack Two 16-bit Data from Both Bottom Half)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * PKBB16 Rd, Rs1, Rs2
 * PKBT16 Rd, Rs1, Rs2
 * PKTT16 Rd, Rs1, Rs2
 * PKTB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * PKBB16: bottom.bottom
 * * PKBT16 bottom.top
 * * PKTT16 top.top
 * * PKTB16 top.bottom
 *
 * **Description**:\n
 * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
 * Rd.W[x] [15:0].
 * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKBB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pkbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.79.1. PKBB16 ===== */

/* ===== Inline Function Start for 3.79.2. PKBT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
 * \brief PKBT16 (Pack Two 16-bit Data from Bottom and Top Half)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * PKBB16 Rd, Rs1, Rs2
 * PKBT16 Rd, Rs1, Rs2
 * PKTT16 Rd, Rs1, Rs2
 * PKTB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * PKBB16: bottom.bottom
 * * PKBT16 bottom.top
 * * PKTT16 top.top
 * * PKTB16 top.bottom
 *
 * **Description**:\n
 * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
 * Rd.W[x] [15:0].
 * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKBT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pkbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.79.2. PKBT16 ===== */

/* ===== Inline Function Start for 3.79.3. PKTT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
 * \brief PKTT16 (Pack Two 16-bit Data from Both Top Half)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * PKBB16 Rd, Rs1, Rs2
 * PKBT16 Rd, Rs1, Rs2
 * PKTT16 Rd, Rs1, Rs2
 * PKTB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * PKBB16: bottom.bottom
 * * PKBT16 bottom.top
 * * PKTT16 top.top
 * * PKTB16 top.bottom
 *
 * **Description**:\n
 * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
 * Rd.W[x] [15:0].
 * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKTT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pktt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.79.3. PKTT16 ===== */

/* ===== Inline Function Start for 3.79.4. PKTB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
 * \brief PKTB16 (Pack Two 16-bit Data from Top and Bottom Half)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * PKBB16 Rd, Rs1, Rs2
 * PKBT16 Rd, Rs1, Rs2
 * PKTT16 Rd, Rs1, Rs2
 * PKTB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * PKBB16: bottom.bottom
 * * PKBT16 bottom.top
 * * PKTT16 top.top
 * * PKTB16 top.bottom
 *
 * **Description**:\n
 * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
 * Rd.W[x] [15:0].
 * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKTB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pktb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.79.4. PKTB16 ===== */

/* ===== Inline Function Start for 3.80. RADD8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief RADD8 (SIMD 8-bit Signed Halving Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RADD8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer element additions simultaneously. The element results are halved
 * to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
 * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
 * Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Rs1 = 0x7F, Rs2 = 0x7F, Rd = 0x7F
 * * Rs1 = 0x80, Rs2 = 0x80, Rd = 0x80
 * * Rs1 = 0x40, Rs2 = 0x80, Rd = 0xE0
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] + Rs2.B[x]) s>> 1; for RV32: x=3...0, for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RADD8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("radd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.80. RADD8 ===== */

/* ===== Inline Function Start for 3.81. RADD16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief RADD16 (SIMD 16-bit Signed Halving Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RADD16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element additions simultaneously. The results are halved to avoid
 * overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
 * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
 * Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Rs1 = 0x7FFF, Rs2 = 0x7FFF, Rd = 0x7FFF
 * * Rs1 = 0x8000, Rs2 = 0x8000, Rd = 0x8000
 * * Rs1 = 0x4000, Rs2 = 0x8000, Rd = 0xE000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) s>> 1; for RV32: x=1...0, for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RADD16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("radd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.81. RADD16 ===== */

/* ===== Inline Function Start for 3.82. RADD64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief RADD64 (64-bit Signed Halving Addition)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * RADD64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add two 64-bit signed integers. The result is halved to avoid overflow or saturation.
 *
 * **RV32 Description**:\n
 * This instruction adds the 64-bit signed integer of an even/odd pair of registers
 * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by
 * Rs2(4,1). The 64-bit addition result is first arithmetically right-shifted by 1 bit and then written to an
 * even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
 * pair includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed
 * integer in Rs2. The 64-bit addition result is first arithmetically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
 * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) s>> 1;
 * RV64:
 * Rd = (Rs1 + Rs2) s>> 1;
 * ~~~
 *
 * \param [in]  a    long long type of value stored in a
 * \param [in]  b    long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_RADD64(long long a, long long b)
{
    long long result;
    __ASM volatile("radd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.82. RADD64 ===== */

/* ===== Inline Function Start for 3.83. RADDW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief RADDW (32-bit Signed Halving Addition)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * RADDW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add 32-bit signed integers and the results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the first 32-bit signed integer in Rs1 with the first 32-bit signed
 * integer in Rs2. The result is first arithmetically right-shifted by 1 bit and then sign-extended and
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF, Rd = 0x7FFFFFFF
 * * Rs1 = 0x80000000, Rs2 = 0x80000000, Rd = 0x80000000
 * * Rs1 = 0x40000000, Rs2 = 0x80000000, Rd = 0xE0000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1;
 * RV64:
 * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1;
 * Rd[63:0] = SE(resw[31:0]);
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_RADDW(int a, int b)
{
    long result;
    __ASM volatile("raddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.83. RADDW ===== */

/* ===== Inline Function Start for 3.84. RCRAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief RCRAS16 (SIMD 16-bit Signed Halving Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RCRAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in
 * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results
 * are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
 * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit
 * signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer element in
 * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and
 * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD16` and `RSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) s>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) s>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RCRAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.84. RCRAS16 ===== */

/* ===== Inline Function Start for 3.85. RCRSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief RCRSA16 (SIMD 16-bit Signed Halving Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RCRSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in
 * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results
 * are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks
 * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit
 * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in
 * [31:16] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and
 * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD16` and `RSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) s>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) s>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RCRSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.85. RCRSA16 ===== */

/* ===== Inline Function Start for 3.86. RDOV ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC
 * \brief RDOV (Read OV flag)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * RDOV Rd  # pseudo mnemonic
 * ~~~
 *
 * **Purpose**:\n
 * This pseudo instruction is an alias to `CSRR Rd, ucode` instruction which maps to the real
 * instruction of `CSRRS Rd, ucode, x0`.
 *
 *
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RDOV(void)
{
    unsigned long result;
    __ASM volatile("rdov %0" : "=r"(result));
    return result;
}
/* ===== Inline Function End for 3.86. RDOV ===== */

/* ===== Inline Function Start for 3.87. RSTAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief RSTAS16 (SIMD 16-bit Signed Halving Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RSTAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in
 * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The
 * results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
 * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit
 * signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer element in
 * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and
 * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD16` and `RSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) s>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) s>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RSTAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.87. RSTAS16 ===== */

/* ===== Inline Function Start for 3.88. RSTSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief RSTSA16 (SIMD 16-bit Signed Halving Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RSTSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in
 * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The
 * results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks
 * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit
 * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in
 * [15:0] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and then
 * written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD16` and `RSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) s>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) s>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RSTSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.88. RSTSA16 ===== */

/* ===== Inline Function Start for 3.89. RSUB8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief RSUB8 (SIMD 8-bit Signed Halving Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RSUB8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer element subtractions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
 * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Rs1 = 0x7F, Rs2 = 0x80, Rd = 0x7F
 * * Rs1 = 0x80, Rs2 = 0x7F, Rd = 0x80
 * * Rs1= 0x80, Rs2 = 0x40, Rd = 0xA0
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) s>> 1;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RSUB8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rsub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.89. RSUB8 ===== */

/* ===== Inline Function Start for 3.90. RSUB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief RSUB16 (SIMD 16-bit Signed Halving Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RSUB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element subtractions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
 * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFF, Rb = 0x8000, Rt = 0x7FFF
 * * Ra = 0x8000, Rb = 0x7FFF, Rt = 0x8000
 * * Ra = 0x8000, Rb = 0x4000, Rt = 0xA000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) s>> 1;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RSUB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.90. RSUB16 ===== */

/* ===== Inline Function Start for 3.91. RSUB64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief RSUB64 (64-bit Signed Halving Subtraction)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * RSUB64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform a 64-bit signed integer subtraction. The result is halved to avoid overflow or
 * saturation.
 *
 * **RV32 Description**:\n
 * This instruction subtracts the 64-bit signed integer of an even/odd pair of
 * registers specified by Rb(4,1) from the 64-bit signed integer of an even/odd pair of registers
 * specified by Ra(4,1). The subtraction result is first arithmetically right-shifted by 1 bit and then
 * written to an even/odd pair of registers specified by Rt(4,1).
 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
 * pair includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction subtracts the 64-bit signed integer in Rs2 from the 64-bit signed
 * integer in Rs1. The 64-bit subtraction result is first arithmetically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
 * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) s>> 1;
 * RV64:
 * Rd = (Rs1 - Rs2) s>> 1;
 * ~~~
 *
 * \param [in]  a    long long type of value stored in a
 * \param [in]  b    long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_RSUB64(long long a, long long b)
{
    long long result;
    __ASM volatile("rsub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.91. RSUB64 ===== */

/* ===== Inline Function Start for 3.92. RSUBW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief RSUBW (32-bit Signed Halving Subtraction)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * RSUBW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Subtract 32-bit signed integers and the result is halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit
 * signed integer in Rs1. The result is first arithmetically right-shifted by 1 bit and then sign-extended
 * and written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Rs1 = 0x7FFFFFFF, Rs2 = 0x80000000, Rd = 0x7FFFFFFF
 * * Rs1 = 0x80000000, Rs2 = 0x7FFFFFFF, Rd = 0x80000000
 * * Rs1 = 0x80000000, Rs2 = 0x40000000, Rd = 0xA0000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1;
 * RV64:
 * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1;
 * Rd[63:0] = SE(resw[31:0]);
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_RSUBW(int a, int b)
{
    long result;
    __ASM volatile("rsubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.92. RSUBW ===== */

/* ===== Inline Function Start for 3.93. SCLIP8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief SCLIP8 (SIMD 8-bit Signed Clip Value)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SCLIP8 Rd, Rs1, imm3u[2:0]
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 8-bit signed integer elements of a register into a signed range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 8-bit signed integer elements stored in Rs1 into a signed
 * integer range between 2^imm3u-1 and -2^imm3u, and writes the limited results to Rd. For example, if
 * imm3u is 3, the 8-bit input values should be saturated between 7 and -8. If saturation is performed,
 * set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.B[x];
 * if (src > (2^imm3u)-1) {
 *   src = (2^imm3u)-1;
 *   OV = 1;
 * } else if (src < -2^imm3u) {
 *   src = -2^imm3u;
 *   OV = 1;
 * }
 * Rd.B[x] = src
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SCLIP8(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("sclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.93. SCLIP8 ===== */

/* ===== Inline Function Start for 3.94. SCLIP16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief SCLIP16 (SIMD 16-bit Signed Clip Value)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SCLIP16 Rd, Rs1, imm4u[3:0]
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 16-bit signed integer elements of a register into a signed range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 16-bit signed integer elements stored in Rs1 into a signed
 * integer range between 2imm4u-1 and -2imm4u, and writes the limited results to Rd. For example, if
 * imm4u is 3, the 16-bit input values should be saturated between 7 and -8. If saturation is performed,
 * set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.H[x];
 * if (src > (2^imm4u)-1) {
 *   src = (2^imm4u)-1;
 *   OV = 1;
 * } else if (src < -2^imm4u) {
 *   src = -2^imm4u;
 *   OV = 1;
 * }
 * Rd.H[x] = src
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SCLIP16(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("sclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.94. SCLIP16 ===== */

/* ===== Inline Function Start for 3.95. SCLIP32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
 * \brief SCLIP32 (SIMD 32-bit Signed Clip Value)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SCLIP32 Rd, Rs1, imm5u[4:0]
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 32-bit signed integer elements of a register into a signed range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 32-bit signed integer elements stored in Rs1 into a signed
 * integer range between 2imm5u-1 and -2imm5u, and writes the limited results to Rd. For example, if
 * imm5u is 3, the 32-bit input values should be saturated between 7 and -8. If saturation is performed,
 * set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.W[x];
 * if (src > (2^imm5u)-1) {
 *   src = (2^imm5u)-1;
 *   OV = 1;
 * } else if (src < -2^imm5u) {
 *   src = -2^imm5u;
 *   OV = 1;
 * }
 * Rd.W[x] = src
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
#define __RV_SCLIP32(a, b)    \
    ({    \
        long result;    \
        long __a = (long)(a);    \
        __ASM volatile("sclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.95. SCLIP32 ===== */

/* ===== Inline Function Start for 3.96. SCMPLE8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
 * \brief SCMPLE8 (SIMD 8-bit Signed Compare Less Than & Equal)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SCMPLE8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer elements less than & equal comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
 * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is
 * true, the result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to
 * Rd
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] {le} Rs2.B[x])? 0xff : 0x0;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SCMPLE8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("scmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.96. SCMPLE8 ===== */

/* ===== Inline Function Start for 3.97. SCMPLE16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
 * \brief SCMPLE16 (SIMD 16-bit Signed Compare Less Than & Equal)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SCMPLE16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer elements less than & equal comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
 * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is
 * true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written
 * to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] {le} Rs2.H[x])? 0xffff : 0x0;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SCMPLE16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("scmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.97. SCMPLE16 ===== */

/* ===== Inline Function Start for 3.98. SCMPLT8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
 * \brief SCMPLT8 (SIMD 8-bit Signed Compare Less Than)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SCMPLT8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer elements less than comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
 * signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
 * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? 0xff : 0x0;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SCMPLT8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("scmplt8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.98. SCMPLT8 ===== */

/* ===== Inline Function Start for 3.99. SCMPLT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
 * \brief SCMPLT16 (SIMD 16-bit Signed Compare Less Than)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SCMPLT16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer elements less than comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit signed integer elements in Rs1 with the two 16-
 * bit signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
 * result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? 0xffff : 0x0;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SCMPLT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("scmplt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.99. SCMPLT16 ===== */

/* ===== Inline Function Start for 3.100. SLL8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SLL8 (SIMD 8-bit Shift Left Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SLL8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical left shift operations simultaneously. The shift amount is a
 * variable from a GPR.
 *
 * **Description**:\n
 * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
 * The shifted out bits are filled with zero and the shift amount is specified by the low-order 3-bits of
 * the value in the Rs2 register.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[2:0];
 * Rd.B[x] = Rs1.B[x] << sa;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SLL8(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("sll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.100. SLL8 ===== */

/* ===== Inline Function Start for 3.101. SLLI8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SLLI8 (SIMD 8-bit Shift Left Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SLLI8 Rd, Rs1, imm3u
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical left shift operations simultaneously. The shift amount is an
 * immediate value.
 *
 * **Description**:\n
 * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
 * The shifted out bits are filled with zero and the shift amount is specified by the imm3u constant.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm3u[2:0];
 * Rd.B[x] = Rs1.B[x] << sa;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SLLI8(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("slli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.101. SLLI8 ===== */

/* ===== Inline Function Start for 3.102. SLL16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SLL16 (SIMD 16-bit Shift Left Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SLL16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical left shift operations simultaneously. The shift amount is a
 * variable from a GPR.
 *
 * **Description**:\n
 * The 16-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
 * The shifted out bits are filled with zero and the shift amount is specified by the low-order 4-bits of
 * the value in the Rs2 register.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[3:0];
 * Rd.H[x] = Rs1.H[x] << sa;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SLL16(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("sll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.102. SLL16 ===== */

/* ===== Inline Function Start for 3.103. SLLI16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SLLI16 (SIMD 16-bit Shift Left Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SLLI16 Rd, Rs1, imm4[3:0]
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit element logical left shift operations simultaneously. The shift amount is an
 * immediate value.
 *
 * **Description**:\n
 * The 16-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with
 * zero and the shift amount is specified by the imm4[3:0] constant. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm4[3:0];
 * Rd.H[x] = Rs1.H[x] << sa;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SLLI16(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("slli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.103. SLLI16 ===== */

/* ===== Inline Function Start for 3.104. SMAL ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMAL (Signed Multiply Halfs & Add 64-bit)
 * \details
 * **Type**: Partial-SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMAL Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed bottom 16-bit content of the 32-bit elements of a register with the top
 * 16-bit content of the same 32-bit elements of the same register, and add the results with a 64-bit
 * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
 * to another even/odd pair of registers (RV32) or a register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the bottom 16-bit content of the lower 32-bit of Rs2 with the top 16-bit
 * content of the lower 32-bit of Rs2 and adds the result with the 64-bit value of an even/odd pair of
 * registers specified by Rs1(4,1). The 64-bit addition result is written back to an even/odd pair of
 * registers specified by Rd(4,1). The 16-bit values of Rs2, and the 64-bit value of the Rs1(4,1) register-
 * pair are treated as signed integers.
 * Rx(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs2 with the top 16-bit
 * content of the same 32-bit elements of Rs2 and adds the results with the 64-bit value of Rs1. The 64-
 * bit addition result is written back to Rd. The 16-bit values of Rs2, and the 64-bit value of Rs1 are
 * treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mres[31:0] = Rs2.H[1] * Rs2.H[0];
 * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1); +
 * Idx2 = CONCAT(Rd(4,1),1'b0); Idx3 = CONCAT(Rd(4,1),1'b1);
 * R[Idx3].R[Idx2] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
 * RV64:
 * Mres[0][31:0] = Rs2.W[0].H[1] * Rs2.W[0].H[0];
 * Mres[1][31:0] = Rs2.W[1].H[1] * Rs2.W[1].H[0];
 * Rd = Rs1 + SE64(Mres[1][31:0]) + SE64(Mres[0][31:0]);
 * ~~~
 *
 * \param [in]  a    long long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMAL(long long a, unsigned long b)
{
    long long result;
    __ASM volatile("smal %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.104. SMAL ===== */

/* ===== Inline Function Start for 3.105.1. SMALBB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALBB (Signed Multiply Bottom Halfs & Add 64-bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALBB Rd, Rs1, Rs2
 * SMALBT Rd, Rs1, Rs2
 * SMALTT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
 * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
 * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
 * to the register-pair (RV32) or the register (RV64).
 * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
 * * SMALBT rt pair + bottom*top (all 32-bit elements)
 * * SMALTT rt pair + top*top (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2.
 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
 * content of Rs2.
 * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
 * of Rs2.
 * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2.
 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2.
 * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
 * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
 * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
 * RV64:
 * // SMALBB
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
 * // SMALBT
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
 * // SMALTT
 * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALBB(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smalbb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.105.1. SMALBB ===== */

/* ===== Inline Function Start for 3.105.2. SMALBT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALBT (Signed Multiply Bottom Half & Top Half & Add 64-bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALBB Rd, Rs1, Rs2
 * SMALBT Rd, Rs1, Rs2
 * SMALTT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
 * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
 * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
 * to the register-pair (RV32) or the register (RV64).
 * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
 * * SMALBT rt pair + bottom*top (all 32-bit elements)
 * * SMALTT rt pair + top*top (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2.
 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
 * content of Rs2.
 * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
 * of Rs2.
 * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2.
 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2.
 * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
 * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
 * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
 * RV64:
 * // SMALBB
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
 * // SMALBT
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
 * // SMALTT
 * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALBT(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smalbt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.105.2. SMALBT ===== */

/* ===== Inline Function Start for 3.105.3. SMALTT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALTT (Signed Multiply Top Halfs & Add 64-bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALBB Rd, Rs1, Rs2
 * SMALBT Rd, Rs1, Rs2
 * SMALTT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
 * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
 * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
 * to the register-pair (RV32) or the register (RV64).
 * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
 * * SMALBT rt pair + bottom*top (all 32-bit elements)
 * * SMALTT rt pair + top*top (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2.
 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
 * content of Rs2.
 * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
 * of Rs2.
 * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2.
 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2.
 * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
 * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
 * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
 * RV64:
 * // SMALBB
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
 * // SMALBT
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
 * // SMALTT
 * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALTT(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smaltt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.105.3. SMALTT ===== */

/* ===== Inline Function Start for 3.106.1. SMALDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALDA (Signed Multiply Two Halfs and Two Adds 64-bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALDA Rd, Rs1, Rs2
 * SMALXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together.
 * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements)
 * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
 * the top 16-bit content of Rs2 with unlimited precision.
 * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
 * with the top 16-bit content of Rs2 with unlimited precision.
 * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64-
 * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64-
 * bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
 * bit elements of Rs2 with unlimited precision.
 * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
 * 32-bit elements of Rs2 with unlimited precision.
 * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
 * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * // SMALDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
 * // SMALXDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]);
 * RV64:
 * // SMALDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * // SMALXDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
 * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) +
 * SE64(Mres1[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALDA(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smalda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.106.1. SMALDA ===== */

/* ===== Inline Function Start for 3.106.2. SMALXDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALXDA (Signed Crossed Multiply Two Halfs and Two Adds 64-bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALDA Rd, Rs1, Rs2
 * SMALXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together.
 * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements)
 * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
 * the top 16-bit content of Rs2 with unlimited precision.
 * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
 * with the top 16-bit content of Rs2 with unlimited precision.
 * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64-
 * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64-
 * bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
 * bit elements of Rs2 with unlimited precision.
 * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
 * 32-bit elements of Rs2 with unlimited precision.
 * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
 * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * // SMALDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
 * // SMALXDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]);
 * RV64:
 * // SMALDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * // SMALXDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
 * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) +
 * SE64(Mres1[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALXDA(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smalxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.106.2. SMALXDA ===== */

/* ===== Inline Function Start for 3.107.1. SMALDS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALDS (Signed Multiply Two Halfs & Subtract & Add 64-bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALDS Rd, Rs1, Rs2
 * SMALDRS Rd, Rs1, Rs2
 * SMALXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
 * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
 * written back to the register-pair.
 * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
 * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
 * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the top 16-bit content of Rs2.
 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
 * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
 * with the bottom 16-bit content of Rs2.
 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the bottom 16-bit content of Rs2.
 * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
 * of the 32-bit elements of Rs2.
 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
 * the 32-bit elements of Rs2.
 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
 * content of the 32-bit elements of Rs2.
 * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
 * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
 * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
 * * RV64:
 * // SMALDS
 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * // SMALDRS
 * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * // SMALXDS
 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALDS(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smalds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.107.1. SMALDS ===== */

/* ===== Inline Function Start for 3.107.2. SMALDRS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALDRS (Signed Multiply Two Halfs & Reverse Subtract & Add 64- bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALDS Rd, Rs1, Rs2
 * SMALDRS Rd, Rs1, Rs2
 * SMALXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
 * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
 * written back to the register-pair.
 * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
 * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
 * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the top 16-bit content of Rs2.
 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
 * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
 * with the bottom 16-bit content of Rs2.
 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the bottom 16-bit content of Rs2.
 * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
 * of the 32-bit elements of Rs2.
 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
 * the 32-bit elements of Rs2.
 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
 * content of the 32-bit elements of Rs2.
 * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
 * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
 * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
 * * RV64:
 * // SMALDS
 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * // SMALDRS
 * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * // SMALXDS
 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALDRS(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smaldrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.107.2. SMALDRS ===== */

/* ===== Inline Function Start for 3.107.3. SMALXDS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALXDS (Signed Crossed Multiply Two Halfs & Subtract & Add 64- bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALDS Rd, Rs1, Rs2
 * SMALDRS Rd, Rs1, Rs2
 * SMALXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
 * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
 * written back to the register-pair.
 * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
 * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
 * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the top 16-bit content of Rs2.
 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
 * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
 * with the bottom 16-bit content of Rs2.
 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the bottom 16-bit content of Rs2.
 * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
 * of the 32-bit elements of Rs2.
 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
 * the 32-bit elements of Rs2.
 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
 * content of the 32-bit elements of Rs2.
 * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
 * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
 * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
 * * RV64:
 * // SMALDS
 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * // SMALDRS
 * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * // SMALXDS
 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALXDS(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smalxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.107.3. SMALXDS ===== */

/* ===== Inline Function Start for 3.108. SMAR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief SMAR64 (Signed Multiply and Add to 64-Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMAR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication
 * result to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is written
 * back to the pair of registers (RV32) or a register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds
 * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by
 * Rd(4,1). The addition result is written back to the even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
 * adds the 64-bit multiplication results to the 64-bit signed data of Rd. The addition result is written
 * back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2);
 * * RV64:
 * Rd = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMAR64(long long t, long a, long b)
{
    __ASM volatile("smar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.108. SMAR64 ===== */

/* ===== Inline Function Start for 3.109. SMAQA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
 * \brief SMAQA (Signed Multiply Four Bytes with 32-bit Adds)
 * \details
 * **Type**: Partial-SIMD (Reduction)
 *
 * **Syntax**:\n
 * ~~~
 * SMAQA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four signed 8-bit multiplications from 32-bit chunks of two registers; and then adds
 * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together.
 *
 * **Description**:\n
 * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
 * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the signed
 * content of the corresponding 32-bit chunks of Rd. The final results are written back to the
 * corresponding 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rd.W[x] +
 *    (Rs1.W[x].B[3] s* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] s* Rs2.W[x].B[2]) +
 *    (Rs1.W[x].B[1] s* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] s* Rs2.W[x].B[0]);
 * Rd.W[x] = res[x];
 * for RV32: x=0,
 * for RV64: x=1,0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMAQA(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.109. SMAQA ===== */

/* ===== Inline Function Start for 3.110. SMAQA.SU ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
 * \brief SMAQA.SU (Signed and Unsigned Multiply Four Bytes with 32-bit Adds)
 * \details
 * **Type**: Partial-SIMD (Reduction)
 *
 * **Syntax**:\n
 * ~~~
 * SMAQA.SU Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four `signed x unsigned` 8-bit multiplications from 32-bit chunks of two registers; and
 * then adds the four 16-bit results and the content of corresponding 32-bit chunks of a third register
 * together.
 *
 * **Description**:\n
 * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
 * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
 * signed content of the corresponding 32-bit chunks of Rd. The final results are written back to the
 * corresponding 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rd.W[x] +
 *    (Rs1.W[x].B[3] su* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] su* Rs2.W[x].B[2]) +
 *    (Rs1.W[x].B[1] su* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] su* Rs2.W[x].B[0]);
 * Rd.W[x] = res[x];
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMAQA_SU(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.110. SMAQA.SU ===== */

/* ===== Inline Function Start for 3.111. SMAX8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief SMAX8 (SIMD 8-bit Signed Maximum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMAX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer elements finding maximum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
 * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] > Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SMAX8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("smax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.111. SMAX8 ===== */

/* ===== Inline Function Start for 3.112. SMAX16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief SMAX16 (SIMD 16-bit Signed Maximum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMAX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer elements finding maximum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
 * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] > Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SMAX16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("smax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.112. SMAX16 ===== */

/* ===== Inline Function Start for 3.113.1. SMBB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief SMBB16 (SIMD Signed Multiply Bottom Half & Bottom Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMBB16 Rd, Rs1, Rs2
 * SMBT16 Rd, Rs1, Rs2
 * SMTT16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
 * bit content of the 32-bit elements of another register and write the result to a third register.
 * * SMBB16: W[x].bottom*W[x].bottom
 * * SMBT16: W[x].bottom *W[x].top
 * * SMTT16: W[x].top * W[x].top
 *
 * **Description**:\n
 * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2.
 * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2.
 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
 * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMBB16(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.113.1. SMBB16 ===== */

/* ===== Inline Function Start for 3.113.2. SMBT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief SMBT16 (SIMD Signed Multiply Bottom Half & Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMBB16 Rd, Rs1, Rs2
 * SMBT16 Rd, Rs1, Rs2
 * SMTT16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
 * bit content of the 32-bit elements of another register and write the result to a third register.
 * * SMBB16: W[x].bottom*W[x].bottom
 * * SMBT16: W[x].bottom *W[x].top
 * * SMTT16: W[x].top * W[x].top
 *
 * **Description**:\n
 * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2.
 * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2.
 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
 * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMBT16(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.113.2. SMBT16 ===== */

/* ===== Inline Function Start for 3.113.3. SMTT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief SMTT16 (SIMD Signed Multiply Top Half & Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMBB16 Rd, Rs1, Rs2
 * SMBT16 Rd, Rs1, Rs2
 * SMTT16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
 * bit content of the 32-bit elements of another register and write the result to a third register.
 * * SMBB16: W[x].bottom*W[x].bottom
 * * SMBT16: W[x].bottom *W[x].top
 * * SMTT16: W[x].top * W[x].top
 *
 * **Description**:\n
 * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2.
 * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2.
 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
 * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMTT16(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.113.3. SMTT16 ===== */

/* ===== Inline Function Start for 3.114.1. SMDS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief SMDS (SIMD Signed Multiply Two Halfs and Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMDS Rd, Rs1, Rs2
 * SMDRS Rd, Rs1, Rs2
 * SMXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * perform a subtraction operation between the two 32-bit results.
 * * SMDS: top*top - bottom*bottom (per 32-bit element)
 * * SMDRS: bottom*bottom - top*top (per 32-bit element)
 * * SMXDS: top*bottom - bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
 * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
 * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
 * 32-bit elements of Rs2.
 * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
 * the 32-bit elements of Rs2.
 * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
 * content of the 32-bit elements of Rs2.
 * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
 * multiplication are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * * SMDS:
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * * SMDRS:
 * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
 * * SMXDS:
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMDS(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.114.1. SMDS ===== */

/* ===== Inline Function Start for 3.114.2. SMDRS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief SMDRS (SIMD Signed Multiply Two Halfs and Reverse Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMDS Rd, Rs1, Rs2
 * SMDRS Rd, Rs1, Rs2
 * SMXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * perform a subtraction operation between the two 32-bit results.
 * * SMDS: top*top - bottom*bottom (per 32-bit element)
 * * SMDRS: bottom*bottom - top*top (per 32-bit element)
 * * SMXDS: top*bottom - bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
 * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
 * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
 * 32-bit elements of Rs2.
 * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
 * the 32-bit elements of Rs2.
 * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
 * content of the 32-bit elements of Rs2.
 * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
 * multiplication are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * * SMDS:
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * * SMDRS:
 * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
 * * SMXDS:
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMDRS(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smdrs %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.114.2. SMDRS ===== */

/* ===== Inline Function Start for 3.114.3. SMXDS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief SMXDS (SIMD Signed Crossed Multiply Two Halfs and Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMDS Rd, Rs1, Rs2
 * SMDRS Rd, Rs1, Rs2
 * SMXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * perform a subtraction operation between the two 32-bit results.
 * * SMDS: top*top - bottom*bottom (per 32-bit element)
 * * SMDRS: bottom*bottom - top*top (per 32-bit element)
 * * SMXDS: top*bottom - bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
 * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
 * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
 * 32-bit elements of Rs2.
 * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
 * the 32-bit elements of Rs2.
 * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
 * content of the 32-bit elements of Rs2.
 * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
 * multiplication are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * * SMDS:
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * * SMDRS:
 * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
 * * SMXDS:
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMXDS(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smxds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.114.3. SMXDS ===== */

/* ===== Inline Function Start for 3.115. SMIN8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief SMIN8 (SIMD 8-bit Signed Minimum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMIN8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer elements finding minimum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
 * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SMIN8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("smin8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.115. SMIN8 ===== */

/* ===== Inline Function Start for 3.116. SMIN16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief SMIN16 (SIMD 16-bit Signed Minimum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMIN16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer elements finding minimum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
 * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SMIN16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("smin16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.116. SMIN16 ===== */

/* ===== Inline Function Start for 3.117.1. SMMUL ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief SMMUL (SIMD MSW Signed Multiply Word)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMMUL Rd, Rs1, Rs2
 * SMMUL.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit signed integer elements of two registers and write the most significant
 * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an
 * additional rounding up operation on the multiplication results before taking the most significant
 * 32-bit part of the results.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
 * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
 * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up
 * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
 * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][63:31] + 1;
 *   Rd.W[x] = Round[x][32:1];
 * } else {
 *   Rd.W[x] = Mres[x][63:32];
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMMUL(long a, long b)
{
    long result;
    __ASM volatile("smmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.117.1. SMMUL ===== */

/* ===== Inline Function Start for 3.117.2. SMMUL.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief SMMUL.u (SIMD MSW Signed Multiply Word with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMMUL Rd, Rs1, Rs2
 * SMMUL.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit signed integer elements of two registers and write the most significant
 * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an
 * additional rounding up operation on the multiplication results before taking the most significant
 * 32-bit part of the results.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
 * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
 * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up
 * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
 * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][63:31] + 1;
 *   Rd.W[x] = Round[x][32:1];
 * } else {
 *   Rd.W[x] = Mres[x][63:32];
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMMUL_U(long a, long b)
{
    long result;
    __ASM volatile("smmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.117.2. SMMUL.u ===== */

/* ===== Inline Function Start for 3.118.1. SMMWB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief SMMWB (SIMD MSW Signed Multiply Word and Bottom Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMMWB Rd, Rs1, Rs2
 * SMMWB.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
 * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
 * significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
 * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
 * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
 * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   Rd.W[x] = Round[x][32:1];
 * } else {
 *   Rd.W[x] = Mres[x][47:16];
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMMWB(long a, unsigned long b)
{
    long result;
    __ASM volatile("smmwb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.118.1. SMMWB ===== */

/* ===== Inline Function Start for 3.118.2. SMMWB.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief SMMWB.u (SIMD MSW Signed Multiply Word and Bottom Half with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMMWB Rd, Rs1, Rs2
 * SMMWB.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
 * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
 * significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
 * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
 * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
 * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   Rd.W[x] = Round[x][32:1];
 * } else {
 *   Rd.W[x] = Mres[x][47:16];
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMMWB_U(long a, unsigned long b)
{
    long result;
    __ASM volatile("smmwb.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.118.2. SMMWB.u ===== */

/* ===== Inline Function Start for 3.119.1. SMMWT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief SMMWT (SIMD MSW Signed Multiply Word and Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMMWT Rd, Rs1, Rs2
 * SMMWT.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
 * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
 * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
 * significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of
 * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
 * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
 * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   Rd.W[x] = Round[x][32:1];
 * } else {
 *   Rd.W[x] = Mres[x][47:16];
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMMWT(long a, unsigned long b)
{
    long result;
    __ASM volatile("smmwt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.119.1. SMMWT ===== */

/* ===== Inline Function Start for 3.119.2. SMMWT.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief SMMWT.u (SIMD MSW Signed Multiply Word and Top Half with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMMWT Rd, Rs1, Rs2
 * SMMWT.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
 * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
 * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
 * significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of
 * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
 * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
 * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   Rd.W[x] = Round[x][32:1];
 * } else {
 *   Rd.W[x] = Mres[x][47:16];
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMMWT_U(long a, unsigned long b)
{
    long result;
    __ASM volatile("smmwt.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.119.2. SMMWT.u ===== */

/* ===== Inline Function Start for 3.120.1. SMSLDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMSLDA (Signed Multiply Two Halfs & Add & Subtract 64-bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMSLDA Rd, Rs1, Rs2
 * SMSLXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a
 * register (RV64). The subtraction result is written back to the register-pair.
 * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements)
 * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
 * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
 * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers
 * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit
 * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of
 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction
 * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated
 * as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * // SMSLDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
 * // SMSLXDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]);
 * * RV64:
 * // SMSLDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * // SMSLXDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
 * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) -
 * SE64(Mres1[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMSLDA(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smslda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.120.1. SMSLDA ===== */

/* ===== Inline Function Start for 3.120.2. SMSLXDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMSLXDA (Signed Crossed Multiply Two Halfs & Add & Subtract 64- bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMSLDA Rd, Rs1, Rs2
 * SMSLXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a
 * register (RV64). The subtraction result is written back to the register-pair.
 * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements)
 * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
 * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
 * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers
 * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit
 * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of
 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction
 * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated
 * as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * // SMSLDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
 * // SMSLXDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]);
 * * RV64:
 * // SMSLDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * // SMSLXDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
 * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) -
 * SE64(Mres1[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMSLXDA(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smslxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.120.2. SMSLXDA ===== */

/* ===== Inline Function Start for 3.121. SMSR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief SMSR64 (Signed Multiply and Subtract from 64- Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMSR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication
 * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
 * written back to the pair of registers (RV32) or a register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It
 * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers
 * specified by Rd(4,1). The subtraction result is written back to the even/odd pair of registers
 * specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
 * subtracts the 64-bit multiplication results from the 64-bit signed data of Rd. The subtraction result is
 * written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].R[t_L] = R[t_H].R[t_L] - (Rs1 * Rs2);
 * * RV64:
 * Rd = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMSR64(long long t, long a, long b)
{
    __ASM volatile("smsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.121. SMSR64 ===== */

/* ===== Inline Function Start for 3.122.1. SMUL8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
 * \brief SMUL8 (SIMD Signed 8-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMUL8 Rd, Rs1, Rs2
 * SMULX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do signed 8-bit multiplications and generate four 16-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
 * corresponding 8-bit data elements of Rs2.
 * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
 * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
 * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
 * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
 * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
 * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
 * part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
 * corresponding 8-bit data elements of Rs2.
 * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
 * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
 * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
 * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
 * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
 * the bottom part of Rs1.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `SMUL8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `SMULX8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
 * }
 * rest[x/2] = op1t[x/2] s* op2t[x/2];
 * resb[x/2] = op1b[x/2] s* op2b[x/2];
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
 * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
 * x = 0 and 2
 * * RV64:
 * if (is `SMUL8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `SMULX8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
 * }
 * rest[x/2] = op1t[x/2] s* op2t[x/2];
 * resb[x/2] = op1b[x/2] s* op2b[x/2];
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
 * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0];
 * x = 0 and 2
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_SMUL8(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("smul8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.122.1. SMUL8 ===== */

/* ===== Inline Function Start for 3.122.2. SMULX8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
 * \brief SMULX8 (SIMD Signed Crossed 8-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMUL8 Rd, Rs1, Rs2
 * SMULX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do signed 8-bit multiplications and generate four 16-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
 * corresponding 8-bit data elements of Rs2.
 * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
 * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
 * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
 * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
 * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
 * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
 * part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
 * corresponding 8-bit data elements of Rs2.
 * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
 * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
 * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
 * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
 * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
 * the bottom part of Rs1.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `SMUL8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `SMULX8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
 * }
 * rest[x/2] = op1t[x/2] s* op2t[x/2];
 * resb[x/2] = op1b[x/2] s* op2b[x/2];
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
 * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
 * x = 0 and 2
 * * RV64:
 * if (is `SMUL8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `SMULX8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
 * }
 * rest[x/2] = op1t[x/2] s* op2t[x/2];
 * resb[x/2] = op1b[x/2] s* op2b[x/2];
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
 * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0];
 * x = 0 and 2
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_SMULX8(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("smulx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.122.2. SMULX8 ===== */

/* ===== Inline Function Start for 3.123.1. SMUL16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
 * \brief SMUL16 (SIMD Signed 16-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMUL16 Rd, Rs1, Rs2
 * SMULX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do signed 16-bit multiplications and generate two 32-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with
 * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1
 * with the bottom 16-bit Q15 content of Rs2.
 * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit
 * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16-
 * bit Q15 content of Rs2.
 * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
 * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
 * register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
 * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower
 * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time,
 * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15
 * content of the lower 32-bit word in Rs2.
 * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1
 * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the
 * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the
 * lower 32-bit word in Rs2.
 * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the
 * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
 * the lower 32-bit word in Rs1 is written to Rd.W[0]
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `SMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `SMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop s* bop;
 * }
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H] = rest;
 * R[t_L] = resb;
 * * RV64:
 * if (is `SMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `SMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop s* bop;
 * }
 * Rd.W[1] = rest;
 * Rd.W[0] = resb;
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_SMUL16(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("smul16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.123.1. SMUL16 ===== */

/* ===== Inline Function Start for 3.123.2. SMULX16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
 * \brief SMULX16 (SIMD Signed Crossed 16-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMUL16 Rd, Rs1, Rs2
 * SMULX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do signed 16-bit multiplications and generate two 32-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with
 * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1
 * with the bottom 16-bit Q15 content of Rs2.
 * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit
 * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16-
 * bit Q15 content of Rs2.
 * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
 * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
 * register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
 * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower
 * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time,
 * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15
 * content of the lower 32-bit word in Rs2.
 * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1
 * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the
 * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the
 * lower 32-bit word in Rs2.
 * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the
 * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
 * the lower 32-bit word in Rs1 is written to Rd.W[0]
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `SMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `SMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop s* bop;
 * }
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H] = rest;
 * R[t_L] = resb;
 * * RV64:
 * if (is `SMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `SMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop s* bop;
 * }
 * Rd.W[1] = rest;
 * Rd.W[0] = resb;
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_SMULX16(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("smulx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.123.2. SMULX16 ===== */

/* ===== Inline Function Start for 3.124. SRA.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief SRA.u (Rounding Shift Right Arithmetic)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SRA.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform an arithmetic right shift operation with rounding. The shift amount is a variable
 * from a GPR.
 *
 * **Description**:\n
 * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are
 * filled with the sign-bit and the shift amount is specified by the low-order 5-bits (RV32) or 6-bits
 * (RV64) of the Rs2 register. For the rounding operation, a value of 1 is added to the most significant
 * discarded bit of the data to calculate the final result. And the result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * sa = Rs2[4:0];
 * if (sa > 0) {
 *   res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
 *   Rd = res[31:0];
 * } else {
 *   Rd = Rs1;
 * }
 * * RV64:
 * sa = Rs2[5:0];
 * if (sa > 0) {
 *   res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1;
 *   Rd = res[63:0];
 * } else {
 *   Rd = Rs1;
 * }
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SRA_U(long a, unsigned int b)
{
    long result;
    __ASM volatile("sra.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.124. SRA.u ===== */

/* ===== Inline Function Start for 3.125. SRAI.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief SRAI.u (Rounding Shift Right Arithmetic Immediate)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SRAI.u Rd, Rs1, imm6u[4:0] (RV32)
 * SRAI.u Rd, Rs1, imm6u[5:0] (RV64)
 * ~~~
 *
 * **Purpose**:\n
 * Perform an arithmetic right shift operation with rounding. The shift amount is an
 * immediate value.
 *
 * **Description**:\n
 * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are
 * filled with the sign-bit and the shift amount is specified by the imm6u[4:0] (RV32) or imm6u[5:0]
 * (RV64) constant . For the rounding operation, a value of 1 is added to the most significant discarded
 * bit of the data to calculate the final result. And the result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * sa = imm6u[4:0];
 * if (sa > 0) {
 *   res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
 *   Rd = res[31:0];
 * } else {
 *   Rd = Rs1;
 * }
 * * RV64:
 * sa = imm6u[5:0];
 * if (sa > 0) {
 *   res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1;
 *   Rd = res[63:0];
 * } else {
 *   Rd = Rs1;
 * }
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
#define __RV_SRAI_U(a, b)    \
    ({    \
        long result;    \
        long __a = (long)(a);    \
        __ASM volatile("srai.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.125. SRAI.u ===== */

/* ===== Inline Function Start for 3.126.1. SRA8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRA8 (SIMD 8-bit Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRA8 Rd, Rs1, Rs2
 * SRA8.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
 * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
 * added to the most significant discarded bit of each 8-bit data element to calculate the final results.
 * And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA8.u
 *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[7:0];
 *   } else { // SRA8
 *     Rd.B[x] = SE8(Rd.B[x][7:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRA8(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("sra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.126.1. SRA8 ===== */

/* ===== Inline Function Start for 3.126.2. SRA8.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRA8.u (SIMD 8-bit Rounding Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRA8 Rd, Rs1, Rs2
 * SRA8.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
 * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
 * added to the most significant discarded bit of each 8-bit data element to calculate the final results.
 * And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA8.u
 *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[7:0];
 *   } else { // SRA8
 *     Rd.B[x] = SE8(Rd.B[x][7:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRA8_U(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("sra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.126.2. SRA8.u ===== */

/* ===== Inline Function Start for 3.127.1. SRAI8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRAI8 (SIMD 8-bit Shift Right Arithmetic Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRAI8 Rd, Rs1, imm3u
 * SRAI8.u Rd, Rs1, imm3u
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u
 * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
 * discarded bit of each 8-bit data element to calculate the final results. And the results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm3u[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA8.u
 *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[7:0];
 *   } else { // SRA8
 *     Rd.B[x] = SE8(Rd.B[x][7:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRAI8(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srai8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.127.1. SRAI8 ===== */

/* ===== Inline Function Start for 3.127.2. SRAI8.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRAI8.u (SIMD 8-bit Rounding Shift Right Arithmetic Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRAI8 Rd, Rs1, imm3u
 * SRAI8.u Rd, Rs1, imm3u
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u
 * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
 * discarded bit of each 8-bit data element to calculate the final results. And the results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm3u[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA8.u
 *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[7:0];
 *   } else { // SRA8
 *     Rd.B[x] = SE8(Rd.B[x][7:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRAI8_U(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srai8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.127.2. SRAI8.u ===== */

/* ===== Inline Function Start for 3.128.1. SRA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRA16 (SIMD 16-bit Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRA16 Rd, Rs1, Rs2
 * SRA16.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
 * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
 * added to the most significant discarded bit of each 16-bit data element to calculate the final results.
 * And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[3:0];
 * if (sa != 0) {
 *   if (`.u` form) { // SRA16.u
 *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[15:0];
 *   } else { // SRA16
 *     Rd.H[x] = SE16(Rs1.H[x][15:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("sra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.128.1. SRA16 ===== */

/* ===== Inline Function Start for 3.128.2. SRA16.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRA16.u (SIMD 16-bit Rounding Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRA16 Rd, Rs1, Rs2
 * SRA16.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
 * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
 * added to the most significant discarded bit of each 16-bit data element to calculate the final results.
 * And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[3:0];
 * if (sa != 0) {
 *   if (`.u` form) { // SRA16.u
 *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[15:0];
 *   } else { // SRA16
 *     Rd.H[x] = SE16(Rs1.H[x][15:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRA16_U(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("sra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.128.2. SRA16.u ===== */

/* ===== Inline Function Start for 3.129.1. SRAI16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRAI16 (SIMD 16-bit Shift Right Arithmetic Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRAI16 Rd, Rs1, imm4u
 * SRAI16.u Rd, Rs1, imm4u
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is
 * an immediate value. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the
 * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
 * significant discarded bit of each 16-bit data to calculate the final results. And the results are written
 * to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm4u[3:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRAI16.u
 *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[15:0];
 *   } else { // SRAI16
 *     Rd.H[x] = SE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRAI16(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srai16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.129.1. SRAI16 ===== */

/* ===== Inline Function Start for 3.129.2. SRAI16.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRAI16.u (SIMD 16-bit Rounding Shift Right Arithmetic Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRAI16 Rd, Rs1, imm4u
 * SRAI16.u Rd, Rs1, imm4u
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is
 * an immediate value. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the
 * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
 * significant discarded bit of each 16-bit data to calculate the final results. And the results are written
 * to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm4u[3:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRAI16.u
 *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[15:0];
 *   } else { // SRAI16
 *     Rd.H[x] = SE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRAI16_U(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srai16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.129.2. SRAI16.u ===== */

/* ===== Inline Function Start for 3.130.1. SRL8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRL8 (SIMD 8-bit Shift Right Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRL8 Rt, Ra, Rb
 * SRL8.u Rt, Ra, Rb
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
 * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
 * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded
 * bit of each 8-bit data element to calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRL8.u
 *     res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[8:1];
 *   } else { // SRL8
 *     Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRL8(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("srl8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.130.1. SRL8 ===== */

/* ===== Inline Function Start for 3.130.2. SRL8.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRL8.u (SIMD 8-bit Rounding Shift Right Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRL8 Rt, Ra, Rb
 * SRL8.u Rt, Ra, Rb
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
 * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
 * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded
 * bit of each 8-bit data element to calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRL8.u
 *     res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[8:1];
 *   } else { // SRL8
 *     Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRL8_U(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("srl8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.130.2. SRL8.u ===== */

/* ===== Inline Function Start for 3.131.1. SRLI8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRLI8 (SIMD 8-bit Shift Right Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRLI8 Rt, Ra, imm3u
 * SRLI8.u Rt, Ra, imm3u
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
 * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of
 * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to
 * calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm3u[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRLI8.u
 *     res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[8:1];
 *   } else { // SRLI8
 *     Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRLI8(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.131.1. SRLI8 ===== */

/* ===== Inline Function Start for 3.131.2. SRLI8.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRLI8.u (SIMD 8-bit Rounding Shift Right Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRLI8 Rt, Ra, imm3u
 * SRLI8.u Rt, Ra, imm3u
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
 * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of
 * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to
 * calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm3u[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRLI8.u
 *     res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[8:1];
 *   } else { // SRLI8
 *     Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRLI8_U(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srli8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.131.2. SRLI8.u ===== */

/* ===== Inline Function Start for 3.132.1. SRL16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRL16 (SIMD 16-bit Shift Right Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRL16 Rt, Ra, Rb
 *  SRL16.u Rt, Ra, Rb
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2
 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
 * discarded bit of each 16-bit data element to calculate the final results. And the results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[3:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRL16.u
 *     res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[16:1];
 *   } else { // SRL16
 *     Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRL16(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("srl16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.132.1. SRL16 ===== */

/* ===== Inline Function Start for 3.132.2. SRL16.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRL16.u (SIMD 16-bit Rounding Shift Right Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRL16 Rt, Ra, Rb
 *  SRL16.u Rt, Ra, Rb
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2
 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
 * discarded bit of each 16-bit data element to calculate the final results. And the results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[3:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRL16.u
 *     res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[16:1];
 *   } else { // SRL16
 *     Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRL16_U(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("srl16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.132.2. SRL16.u ===== */

/* ===== Inline Function Start for 3.133.1. SRLI16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRLI16 (SIMD 16-bit Shift Right Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRLI16 Rt, Ra, imm4u
 * SRLI16.u Rt, Ra, imm4u
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding
 * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit
 * data element to calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm4u;
 * if (sa > 0) {
 *   if (`.u` form) { // SRLI16.u
 *     res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[16:1];
 *   } else { // SRLI16
 *     Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRLI16(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.133.1. SRLI16 ===== */

/* ===== Inline Function Start for 3.133.2. SRLI16.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRLI16.u (SIMD 16-bit Rounding Shift Right Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRLI16 Rt, Ra, imm4u
 * SRLI16.u Rt, Ra, imm4u
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding
 * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit
 * data element to calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm4u;
 * if (sa > 0) {
 *   if (`.u` form) { // SRLI16.u
 *     res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[16:1];
 *   } else { // SRLI16
 *     Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRLI16_U(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srli16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.133.2. SRLI16.u ===== */

/* ===== Inline Function Start for 3.134. STAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief STAS16 (SIMD 16-bit Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * STAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit
 * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with
 * the 16-bit integer element in [31:16] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit
 * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [15:0] of 32-bit chunks in
 * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32-
 * bit chunks in Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][31:16];
 * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][15:0];
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_STAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("stas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.134. STAS16 ===== */

/* ===== Inline Function Start for 3.135. STSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief STSA16 (SIMD 16-bit Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * STSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit
 * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit integer element in [31:16] of 32-bit chunks in Rs2
 * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of
 * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [15:0] of 32-bit chunks in
 * Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to [15:0] of
 * 32-bit chunks in Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][31:16];
 * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][15:0];
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_STSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("stsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.135. STSA16 ===== */

/* ===== Inline Function Start for 3.136. SUB8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief SUB8 (SIMD 8-bit Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SUB8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit integer element subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 8-bit integer elements in Rs2 from the 8-bit integer
 * elements in Rs1, and then writes the result to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned subtraction.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = Rs1.B[x] - Rs2.B[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUB8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("sub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.136. SUB8 ===== */

/* ===== Inline Function Start for 3.137. SUB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief SUB16 (SIMD 16-bit Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SUB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit integer elements in Rs2 from the 16-bit integer
 * elements in Rs1, and then writes the result to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned subtraction.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = Rs1.H[x] - Rs2.H[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("sub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.137. SUB16 ===== */

/* ===== Inline Function Start for 3.138. SUB64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief SUB64 (64-bit Subtraction)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SUB64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform a 64-bit signed or unsigned integer subtraction.
 *
 * **RV32 Description**:\n
 * This instruction subtracts the 64-bit integer of an even/odd pair of registers
 * specified by Rs2(4,1) from the 64-bit integer of an even/odd pair of registers specified by Rs1(4,1),
 * and then writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * This instruction subtracts the 64-bit integer of Rs2 from the 64-bit integer of Rs1,
 * and then writes the 64-bit result to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned subtraction.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
 * R[t_H].R[t_L] = R[a_H].R[a_L] - R[b_H].R[b_L];
 * * RV64:
 * Rd = Rs1 - Rs2;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_SUB64(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("sub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.138. SUB64 ===== */

/* ===== Inline Function Start for 3.139.1. SUNPKD810 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief SUNPKD810 (Signed Unpacking Bytes 1 & 0)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
 * of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
 * // SUNPKD810, x=1,y=0
 * // SUNPKD820, x=2,y=0
 * // SUNPKD830, x=3,y=0
 * // SUNPKD831, x=3,y=1
 * // SUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUNPKD810(unsigned long a)
{
    unsigned long result;
    __ASM volatile("sunpkd810 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.139.1. SUNPKD810 ===== */

/* ===== Inline Function Start for 3.139.2. SUNPKD820 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief SUNPKD820 (Signed Unpacking Bytes 2 & 0)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
 * of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
 * // SUNPKD810, x=1,y=0
 * // SUNPKD820, x=2,y=0
 * // SUNPKD830, x=3,y=0
 * // SUNPKD831, x=3,y=1
 * // SUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUNPKD820(unsigned long a)
{
    unsigned long result;
    __ASM volatile("sunpkd820 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.139.2. SUNPKD820 ===== */

/* ===== Inline Function Start for 3.139.3. SUNPKD830 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief SUNPKD830 (Signed Unpacking Bytes 3 & 0)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
 * of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
 * // SUNPKD810, x=1,y=0
 * // SUNPKD820, x=2,y=0
 * // SUNPKD830, x=3,y=0
 * // SUNPKD831, x=3,y=1
 * // SUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUNPKD830(unsigned long a)
{
    unsigned long result;
    __ASM volatile("sunpkd830 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.139.3. SUNPKD830 ===== */

/* ===== Inline Function Start for 3.139.4. SUNPKD831 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief SUNPKD831 (Signed Unpacking Bytes 3 & 1)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
 * of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
 * // SUNPKD810, x=1,y=0
 * // SUNPKD820, x=2,y=0
 * // SUNPKD830, x=3,y=0
 * // SUNPKD831, x=3,y=1
 * // SUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUNPKD831(unsigned long a)
{
    unsigned long result;
    __ASM volatile("sunpkd831 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.139.4. SUNPKD831 ===== */

/* ===== Inline Function Start for 3.139.5. SUNPKD832 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief SUNPKD832 (Signed Unpacking Bytes 3 & 2)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
 * of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
 * // SUNPKD810, x=1,y=0
 * // SUNPKD820, x=2,y=0
 * // SUNPKD830, x=3,y=0
 * // SUNPKD831, x=3,y=1
 * // SUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUNPKD832(unsigned long a)
{
    unsigned long result;
    __ASM volatile("sunpkd832 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.139.5. SUNPKD832 ===== */

/* ===== Inline Function Start for 3.140. SWAP8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief SWAP8 (Swap Byte within Halfword)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SWAP8 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Swap the bytes within each halfword of a register.
 *
 * **Description**:\n
 * This instruction swaps the bytes within each halfword of Rs1 and writes the result to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = CONCAT(Rs1.H[x][7:0],Rs1.H[x][15:8]);
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SWAP8(unsigned long a)
{
    unsigned long result;
    __ASM volatile("swap8 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.140. SWAP8 ===== */

/* ===== Inline Function Start for 3.141. SWAP16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief SWAP16 (Swap Halfword within Word)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SWAP16 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Swap the 16-bit halfwords within each word of a register.
 *
 * **Description**:\n
 * This instruction swaps the 16-bit halfwords within each word of Rs1 and writes the
 * result to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = CONCAT(Rs1.W[x][15:0],Rs1.H[x][31:16]);
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SWAP16(unsigned long a)
{
    unsigned long result;
    __ASM volatile("swap16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.141. SWAP16 ===== */

/* ===== Inline Function Start for 3.142. UCLIP8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief UCLIP8 (SIMD 8-bit Unsigned Clip Value)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UCLIP8 Rt, Ra, imm3u
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 8-bit signed elements of a register into an unsigned range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 8-bit signed elements stored in Rs1 into an unsigned integer
 * range between 2^imm3u-1 and 0, and writes the limited results to Rd. For example, if imm3u is 3, the 8-
 * bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.H[x];
 * if (src > (2^imm3u)-1) {
 *   src = (2^imm3u)-1;
 *   OV = 1;
 * } else if (src < 0) {
 *   src = 0;
 *   OV = 1;
 * }
 * Rd.H[x] = src;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_UCLIP8(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("uclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.142. UCLIP8 ===== */

/* ===== Inline Function Start for 3.143. UCLIP16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief UCLIP16 (SIMD 16-bit Unsigned Clip Value)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UCLIP16 Rt, Ra, imm4u
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 16-bit signed elements of a register into an unsigned range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 16-bit signed elements stored in Rs1 into an unsigned
 * integer range between 2imm4u-1 and 0, and writes the limited results to Rd. For example, if imm4u is
 * 3, the 16-bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit
 * to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.H[x];
 * if (src > (2^imm4u)-1) {
 *   src = (2^imm4u)-1;
 *   OV = 1;
 * } else if (src < 0) {
 *   src = 0;
 *   OV = 1;
 * }
 * Rd.H[x] = src;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_UCLIP16(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("uclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.143. UCLIP16 ===== */

/* ===== Inline Function Start for 3.144. UCLIP32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
 * \brief UCLIP32 (SIMD 32-bit Unsigned Clip Value)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UCLIP32 Rd, Rs1, imm5u[4:0]
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 32-bit signed integer elements of a register into an unsigned range
 * simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 32-bit signed integer elements stored in Rs1 into an
 * unsigned integer range between 2imm5u-1 and 0, and writes the limited results to Rd. For example, if
 * imm5u is 3, the 32-bit input values should be saturated between 7 and 0. If saturation is performed,
 * set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.W[x];
 * if (src > (2^imm5u)-1) {
 *   src = (2^imm5u)-1;
 *   OV = 1;
 * } else if (src < 0) {
 *   src = 0;
 *   OV = 1;
 * }
 * Rd.W[x] = src
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_UCLIP32(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("uclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.144. UCLIP32 ===== */

/* ===== Inline Function Start for 3.145. UCMPLE8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
 * \brief UCMPLE8 (SIMD 8-bit Unsigned Compare Less Than & Equal)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UCMPLE8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer elements less than & equal comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
 * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it
 * is true, the result is 0xFF; otherwise, the result is 0x0. The four comparison results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] <=u Rs2.B[x])? 0xff : 0x0;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UCMPLE8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ucmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.145. UCMPLE8 ===== */

/* ===== Inline Function Start for 3.146. UCMPLE16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
 * \brief UCMPLE16 (SIMD 16-bit Unsigned Compare Less Than & Equal)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UCMPLE16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer elements less than & equal comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
 * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it
 * is true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are
 * written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] <=u Rs2.H[x])? 0xffff : 0x0;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UCMPLE16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ucmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.146. UCMPLE16 ===== */

/* ===== Inline Function Start for 3.147. UCMPLT8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
 * \brief UCMPLT8 (SIMD 8-bit Unsigned Compare Less Than)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UCMPLT8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer elements less than comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
 * unsigned integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
 * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] <u Rs2.B[x])? 0xff : 0x0;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UCMPLT8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ucmplt8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.147. UCMPLT8 ===== */

/* ===== Inline Function Start for 3.148. UCMPLT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
 * \brief UCMPLT16 (SIMD 16-bit Unsigned Compare Less Than)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UCMPLT16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer elements less than comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
 * unsigned integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
 * result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] <u Rs2.H[x])? 0xffff : 0x0;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UCMPLT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ucmplt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.148. UCMPLT16 ===== */

/* ===== Inline Function Start for 3.149. UKADD8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief UKADD8 (SIMD 8-bit Unsigned Saturating Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKADD8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 8-bit unsigned integer elements in Rs1 with the 8-bit
 * unsigned integer elements in Rs2. If any of the results are beyond the 8-bit unsigned number range
 * (0 <= RES <= 28-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
 * written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.B[x] + Rs2.B[x];
 * if (res[x] > (2^8)-1) {
 *   res[x] = (2^8)-1;
 *   OV = 1;
 * }
 * Rd.B[x] = res[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKADD8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.149. UKADD8 ===== */

/* ===== Inline Function Start for 3.150. UKADD16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief UKADD16 (SIMD 16-bit Unsigned Saturating Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKADD16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit
 * unsigned integer elements in Rs2. If any of the results are beyond the 16-bit unsigned number
 * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.H[x] + Rs2.H[x];
 * if (res[x] > (2^16)-1) {
 *   res[x] = (2^16)-1;
 *   OV = 1;
 * }
 * Rd.H[x] = res[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKADD16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.150. UKADD16 ===== */

/* ===== Inline Function Start for 3.151. UKADD64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief UKADD64 (64-bit Unsigned Saturating Addition)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * UKADD64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add two 64-bit unsigned integers. The result is saturated to the U64 range.
 *
 * **RV32 Description**:\n
 * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers
 * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by
 * Rs2(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the
 * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers
 * specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned
 * integer in Rs2. If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to
 * the range and the OV bit is set to 1. The saturated result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
 * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
 * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
 * result = R[a_H].R[a_L] + R[b_H].R[b_L];
 * if (result > (2^64)-1) {
 *   result = (2^64)-1; OV = 1;
 * }
 * R[t_H].R[t_L] = result;
 * * RV64:
 * result = Rs1 + Rs2;
 * if (result > (2^64)-1) {
 *   result = (2^64)-1; OV = 1;
 * }
 * Rd = result;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UKADD64(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("ukadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.151. UKADD64 ===== */

/* ===== Inline Function Start for 3.152. UKADDH ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
 * \brief UKADDH (Unsigned Addition with U16 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * UKADDH Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add the unsigned lower 32-bit content of two registers with U16 saturation.
 *
 * **Description**:\n
 * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit
 * content of Rs2. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then
 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] + Rs2.W[0];
 * if (tmp > (2^16)-1) {
 *   tmp = (2^16)-1;
 *   OV = 1;
 * }
 * Rd = SE(tmp[15:0]);
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKADDH(unsigned int a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("ukaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.152. UKADDH ===== */

/* ===== Inline Function Start for 3.153. UKADDW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief UKADDW (Unsigned Addition with U32 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * UKADDW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add the unsigned lower 32-bit content of two registers with U32 saturation.
 *
 * **Description**:\n
 * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit
 * content of Rs2. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then
 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] + Rs2.W[0];
 * if (tmp > (2^32)-1) {
 *   tmp[31:0] = (2^32)-1;
 *   OV = 1;
 * }
 * Rd = tmp[31:0]; // RV32
 * Rd = SE(tmp[31:0]); // RV64
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKADDW(unsigned int a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("ukaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.153. UKADDW ===== */

/* ===== Inline Function Start for 3.154. UKCRAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief UKCRAS16 (SIMD 16-bit Unsigned Saturating Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKCRAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned
 * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed
 * positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in
 * Rs1 with the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it
 * subtracts the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit
 * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit
 * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1.
 * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit
 * chunks in Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
 * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
 * if (res1 > (2^16)-1) {
 *   res1 = (2^16)-1;
 *   OV = 1;
 * }
 * if (res2 < 0) {
 *   res2 = 0;
 *   OV = 1;
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKCRAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.154. UKCRAS16 ===== */

/* ===== Inline Function Start for 3.155. UKCRSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief UKCRSA16 (SIMD 16-bit Unsigned Saturating Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKCRSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned
 * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from crossed
 * positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit unsigned integer element in [15:0] of 32-bit
 * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the
 * same time, it adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 with the 16-
 * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the
 * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set
 * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of
 * 32-bit chunks in Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
 * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
 * if (res1 < 0) {
 *   res1 = 0;
 *   OV = 1;
 * } else if (res2 > (2^16)-1) {
 *   res2 = (2^16)-1;
 *   OV = 1;
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKCRSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.155. UKCRSA16 ===== */

/* ===== Inline Function Start for 3.156. UKMAR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief UKMAR64 (Unsigned Multiply and Saturating Add to 64-Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * UKMAR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication
 * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is
 * saturated to the U64 range and written back to the pair of registers (RV32) or the register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
 * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers
 * specified by Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the U64 number
 * range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is
 * written back to the even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
 * It adds the 64-bit multiplication results to the 64-bit unsigned data in Rd with unlimited precision. If
 * the 64-bit addition result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the
 * range and the OV bit is set to 1. The saturated result is written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * result = R[t_H].R[t_L] + (Rs1 * Rs2);
 * if (result > (2^64)-1) {
 *   result = (2^64)-1; OV = 1;
 * }
 * R[t_H].R[t_L] = result;
 * * RV64:
 * // `result` has unlimited precision
 * result = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]);
 * if (result > (2^64)-1) {
 *   result = (2^64)-1; OV = 1;
 * }
 * Rd = result;
 * ~~~
 *
 * \param [in]  t    unsigned long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UKMAR64(unsigned long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("ukmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.156. UKMAR64 ===== */

/* ===== Inline Function Start for 3.157. UKMSR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief UKMSR64 (Unsigned Multiply and Saturating Subtract from 64-Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * UKMSR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit unsigned elements in two registers and subtract the 64-bit
 * multiplication results from the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64).
 * The result is saturated to the U64 range and written back to the pair of registers (RV32) or a register
 * (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
 * subtracts the 64-bit multiplication result from the 64-bit unsigned data of an even/odd pair of
 * registers specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the
 * U64 number range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The
 * saturated result is written back to the even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
 * It subtracts the 64-bit multiplication results from the 64-bit unsigned data of Rd with unlimited
 * precision. If the 64-bit subtraction result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is
 * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * result = R[t_H].R[t_L] - (Rs1 u* Rs2);
 * if (result < 0) {
 *   result = 0; OV = 1;
 * }
 * R[t_H].R[t_L] = result;
 * * RV64:
 * // `result` has unlimited precision
 * result = Rd - (Rs1.W[0] u* Rs2.W[0]) - (Rs1.W[1] u* Rs2.W[1]);
 * if (result < 0) {
 *   result = 0; OV = 1;
 * }
 * Rd = result;
 * ~~~
 *
 * \param [in]  t    unsigned long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UKMSR64(unsigned long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("ukmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.157. UKMSR64 ===== */

/* ===== Inline Function Start for 3.158. UKSTAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief UKSTAS16 (SIMD 16-bit Unsigned Saturating Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKSTAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned
 * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from
 * corresponding positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in
 * Rs1 with the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it
 * subtracts the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit
 * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit
 * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1.
 * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit
 * chunks in Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
 * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
 * if (res1 > (2^16)-1) {
 *   res1 = (2^16)-1;
 *   OV = 1;
 * }
 * if (res2 < 0) {
 *   res2 = 0;
 *   OV = 1;
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSTAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.158. UKSTAS16 ===== */

/* ===== Inline Function Start for 3.159. UKSTSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief UKSTSA16 (SIMD 16-bit Unsigned Saturating Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKSTSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned
 * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from
 * corresponding positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit unsigned integer element in [31:16] of 32-bit
 * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the
 * same time, it adds the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 with the 16-
 * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the
 * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set
 * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of
 * 32-bit chunks in Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
 * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
 * if (res1 < 0) {
 *   res1 = 0;
 *   OV = 1;
 * } else if (res2 > (2^16)-1) {
 *   res2 = (2^16)-1;
 *   OV = 1;
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSTSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.159. UKSTSA16 ===== */

/* ===== Inline Function Start for 3.160. UKSUB8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief UKSUB8 (SIMD 8-bit Unsigned Saturating Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKSUB8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit
 * unsigned integer elements in Rs1. If any of the results are beyond the 8-bit unsigned number range
 * (0 <= RES <= 28-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
 * written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.B[x] - Rs2.B[x];
 * if (res[x] < 0) {
 *   res[x] = 0;
 *   OV = 1;
 * }
 * Rd.B[x] = res[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSUB8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("uksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.160. UKSUB8 ===== */

/* ===== Inline Function Start for 3.161. UKSUB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief UKSUB16 (SIMD 16-bit Unsigned Saturating Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKSUB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit
 * unsigned integer elements in Rs1. If any of the results are beyond the 16-bit unsigned number
 * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.H[x] - Rs2.H[x];
 * if (res[x] < 0) {
 *   res[x] = 0;
 *   OV = 1;
 * }
 * Rd.H[x] = res[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSUB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("uksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.161. UKSUB16 ===== */

/* ===== Inline Function Start for 3.162. UKSUB64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief UKSUB64 (64-bit Unsigned Saturating Subtraction)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * UKSUB64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform a 64-bit signed integer subtraction. The result is saturated to the U64 range.
 *
 * **RV32 Description**:\n
 * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of
 * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers
 * specified by Rs1(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is
 * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd
 * pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * This instruction subtracts the 64-bit unsigned integer of Rs2 from the 64-bit
 * unsigned integer of an even/odd pair of Rs1. If the 64-bit result is beyond the U64 number range (0 <=
 * U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is then written
 * to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
 * result = R[a_H].R[a_L] - R[b_H].R[b_L];
 * if (result < 0) {
 *   result = 0; OV = 1;
 * }
 * R[t_H].R[t_L] = result;
 * * RV64
 * result = Rs1 - Rs2;
 * if (result < 0) {
 *   result = 0; OV = 1;
 * }
 * Rd = result;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UKSUB64(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("uksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.162. UKSUB64 ===== */

/* ===== Inline Function Start for 3.163. UKSUBH ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
 * \brief UKSUBH (Unsigned Subtraction with U16 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * UKSUBH Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Subtract the unsigned lower 32-bit content of two registers with U16 saturation.
 *
 * **Description**:\n
 * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit
 * content of Rs1. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then
 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] - Rs2.W[0];
 * if (tmp > (2^16)-1) {
 *   tmp = (2^16)-1;
 *   OV = 1;
 * }
 * else if (tmp < 0) {
 *   tmp = 0;
 *   OV = 1;
 * }
 * Rd = SE(tmp[15:0]);
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSUBH(unsigned int a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("uksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.163. UKSUBH ===== */

/* ===== Inline Function Start for 3.164. UKSUBW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief UKSUBW (Unsigned Subtraction with U32 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * UKSUBW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Subtract the unsigned lower 32-bit content of two registers with unsigned 32-bit
 * saturation.
 *
 * **Description**:\n
 * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit
 * content of Rs1. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then
 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] - Rs2.W[0];
 * if (tmp < 0) {
 *   tmp[31:0] = 0;
 *   OV = 1;
 * }
 * Rd = tmp[31:0]; // RV32
 * Rd = SE(tmp[31:0]); // RV64
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSUBW(unsigned int a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("uksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.164. UKSUBW ===== */

/* ===== Inline Function Start for 3.165. UMAR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief UMAR64 (Unsigned Multiply and Add to 64-Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * UMAR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication
 * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is
 * written back to the pair of registers (RV32) or a register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
 * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers
 * specified by Rd(4,1). The addition result is written back to the even/odd pair of registers specified by
 * Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
 * It adds the 64-bit multiplication results to the 64-bit unsigned data of Rd. The addition result is
 * written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2);
 * * RV64:
 * Rd = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]);
 * ~~~
 *
 * \param [in]  t    unsigned long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UMAR64(unsigned long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("umar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.165. UMAR64 ===== */

/* ===== Inline Function Start for 3.166. UMAQA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
 * \brief UMAQA (Unsigned Multiply Four Bytes with 32- bit Adds)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * UMAQA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four unsigned 8-bit multiplications from 32-bit chunks of two registers; and then adds
 * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together.
 *
 * **Description**:\n
 * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four
 * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
 * unsigned content of the corresponding 32-bit chunks of Rd. The final results are written back to the
 * corresponding 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rd.W[x] + (Rs1.W[x].B[3] u* Rs2.W[x].B[3]) +
 *          (Rs1.W[x].B[2] u* Rs2.W[x].B[2]) + (Rs1.W[x].B[1] u* Rs2.W[x].B[1]) +
 *          (Rs1.W[x].B[0] u* Rs2.W[x].B[0]);
 * Rd.W[x] = res[x];
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UMAQA(unsigned long t, unsigned long a, unsigned long b)
{
    __ASM volatile("umaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.166. UMAQA ===== */

/* ===== Inline Function Start for 3.167. UMAX8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief UMAX8 (SIMD 8-bit Unsigned Maximum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMAX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer elements finding maximum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit unsigned integer elements in Rs1 with the four 8-
 * bit unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
 * two selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] >u Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UMAX8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("umax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.167. UMAX8 ===== */

/* ===== Inline Function Start for 3.168. UMAX16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief UMAX16 (SIMD 16-bit Unsigned Maximum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMAX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer elements finding maximum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
 * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] >u Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UMAX16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("umax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.168. UMAX16 ===== */

/* ===== Inline Function Start for 3.169. UMIN8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief UMIN8 (SIMD 8-bit Unsigned Minimum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMIN8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer elements finding minimum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
 * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] <u Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UMIN8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("umin8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.169. UMIN8 ===== */

/* ===== Inline Function Start for 3.170. UMIN16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief UMIN16 (SIMD 16-bit Unsigned Minimum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMIN16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer elements finding minimum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
 * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] <u Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UMIN16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("umin16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.170. UMIN16 ===== */

/* ===== Inline Function Start for 3.171. UMSR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief UMSR64 (Unsigned Multiply and Subtract from 64-Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * UMSR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit unsigned elements in two registers and subtract the 64-bit
 * multiplication results from the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64).
 * The result is written back to the pair of registers (RV32) or a register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
 * subtracts the 64-bit multiplication result from the 64-bit unsigned data of an even/odd pair of
 * registers specified by Rd(4,1). The subtraction result is written back to the even/odd pair of registers
 * specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
 * It subtracts the 64-bit multiplication results from the 64-bit unsigned data of Rd. The subtraction
 * result is written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].R[t_L] = R[t_H].R[t_L] - (Rs1 * Rs2);
 * * RV64:
 * Rd = Rd - (Rs1.W[0] u* Rs2.W[0]) - (Rs1.W[1] u* Rs2.W[1]);
 * ~~~
 *
 * \param [in]  t    unsigned long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UMSR64(unsigned long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("umsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.171. UMSR64 ===== */

/* ===== Inline Function Start for 3.172.1. UMUL8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
 * \brief UMUL8 (SIMD Unsigned 8-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMUL8 Rd, Rs1, Rs2
 * UMULX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do unsigned 8-bit multiplications and generate four 16-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
 * with the corresponding unsigned 8-bit data elements of Rs2.
 * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
 * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
 * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
 * elements of Rs2.
 * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
 * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
 * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
 * part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
 * with the corresponding unsigned 8-bit data elements of Rs2.
 * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
 * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
 * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
 * elements of Rs2.
 * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
 * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
 * the bottom part of Rs1.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `UMUL8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `UMULX8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
 * }
 * rest[x/2] = op1t[x/2] u* op2t[x/2];
 * resb[x/2] = op1b[x/2] u* op2b[x/2];
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
 * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
 * x = 0 and 2
 * * RV64:
 * if (is `UMUL8`) {
 *     op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *     op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `UMULX8`) {
 *     op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *     op1b[x/2]  =  Rs1.B[x]; op2b[x/2]  =  Rs2.B[x+1];  //  Rs1  bottom
 * }
 * rest[x/2]  =  op1t[x/2]  u*  op2t[x/2];
 * resb[x/2]  =  op1b[x/2]  u*  op2b[x/2];
 * t_L  =  CONCAT(Rd(4,1),1'b0); t_H  =  CONCAT(Rd(4,1),1'b1);
 * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
 * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; x = 0 and 2
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UMUL8(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("umul8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.172.1. UMUL8 ===== */

/* ===== Inline Function Start for 3.172.2. UMULX8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
 * \brief UMULX8 (SIMD Unsigned Crossed 8-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMUL8 Rd, Rs1, Rs2
 * UMULX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do unsigned 8-bit multiplications and generate four 16-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
 * with the corresponding unsigned 8-bit data elements of Rs2.
 * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
 * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
 * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
 * elements of Rs2.
 * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
 * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
 * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
 * part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
 * with the corresponding unsigned 8-bit data elements of Rs2.
 * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
 * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
 * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
 * elements of Rs2.
 * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
 * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
 * the bottom part of Rs1.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `UMUL8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `UMULX8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
 * }
 * rest[x/2] = op1t[x/2] u* op2t[x/2];
 * resb[x/2] = op1b[x/2] u* op2b[x/2];
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
 * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
 * x = 0 and 2
 * * RV64:
 * if (is `UMUL8`) {
 *     op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *     op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `UMULX8`) {
 *     op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *     op1b[x/2]  =  Rs1.B[x]; op2b[x/2]  =  Rs2.B[x+1];  //  Rs1  bottom
 * }
 * rest[x/2]  =  op1t[x/2]  u*  op2t[x/2];
 * resb[x/2]  =  op1b[x/2]  u*  op2b[x/2];
 * t_L  =  CONCAT(Rd(4,1),1'b0); t_H  =  CONCAT(Rd(4,1),1'b1);
 * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
 * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; x = 0 and 2
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UMULX8(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("umulx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.172.2. UMULX8 ===== */

/* ===== Inline Function Start for 3.173.1. UMUL16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
 * \brief UMUL16 (SIMD Unsigned 16-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMUL16 Rd, Rs1, Rs2
 * UMULX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do unsigned 16-bit multiplications and generate two 32-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `UMUL16` instruction, multiply the top 16-bit U16 content of Rs1 with
 * the top 16-bit U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1
 * with the bottom 16-bit U16 content of Rs2.
 * For the `UMULX16` instruction, multiply the top 16-bit U16 content of Rs1 with the bottom 16-bit
 * U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1 with the top 16-
 * bit U16 content of Rs2.
 * The two U32 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
 * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
 * register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
 * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `UMUL16` instruction, multiply the top 16-bit U16 content of the lower
 * 32-bit word in Rs1 with the top 16-bit U16 content of the lower 32-bit word in Rs2. At the same time,
 * multiply the bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the bottom 16-bit U16
 * content of the lower 32-bit word in Rs2.
 * For the `UMULX16` instruction, multiply the top 16-bit U16 content of the lower 32-bit word in Rs1
 * with the bottom 16-bit U16 content of the lower 32-bit word in Rs2. At the same time, multiply the
 * bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the top 16-bit U16 content of the
 * lower 32-bit word in Rs2.
 * The two 32-bit U32 results are then written into Rd. The result calculated from the top 16-bit of the
 * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
 * the lower 32-bit word in Rs1 is written to Rd.W[0]
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `UMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `UMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop u* bop;
 * }
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H] = rest;
 * R[t_L] = resb;
 * * RV64:
 * if (is `UMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `UMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop u* bop;
 * }
 * Rd.W[1] = rest;
 * Rd.W[0] = resb;
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UMUL16(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("umul16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.173.1. UMUL16 ===== */

/* ===== Inline Function Start for 3.173.2. UMULX16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
 * \brief UMULX16 (SIMD Unsigned Crossed 16-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMUL16 Rd, Rs1, Rs2
 * UMULX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do unsigned 16-bit multiplications and generate two 32-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `UMUL16` instruction, multiply the top 16-bit U16 content of Rs1 with
 * the top 16-bit U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1
 * with the bottom 16-bit U16 content of Rs2.
 * For the `UMULX16` instruction, multiply the top 16-bit U16 content of Rs1 with the bottom 16-bit
 * U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1 with the top 16-
 * bit U16 content of Rs2.
 * The two U32 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
 * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
 * register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
 * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `UMUL16` instruction, multiply the top 16-bit U16 content of the lower
 * 32-bit word in Rs1 with the top 16-bit U16 content of the lower 32-bit word in Rs2. At the same time,
 * multiply the bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the bottom 16-bit U16
 * content of the lower 32-bit word in Rs2.
 * For the `UMULX16` instruction, multiply the top 16-bit U16 content of the lower 32-bit word in Rs1
 * with the bottom 16-bit U16 content of the lower 32-bit word in Rs2. At the same time, multiply the
 * bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the top 16-bit U16 content of the
 * lower 32-bit word in Rs2.
 * The two 32-bit U32 results are then written into Rd. The result calculated from the top 16-bit of the
 * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
 * the lower 32-bit word in Rs1 is written to Rd.W[0]
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `UMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `UMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop u* bop;
 * }
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H] = rest;
 * R[t_L] = resb;
 * * RV64:
 * if (is `UMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `UMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop u* bop;
 * }
 * Rd.W[1] = rest;
 * Rd.W[0] = resb;
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UMULX16(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("umulx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.173.2. UMULX16 ===== */

/* ===== Inline Function Start for 3.174. URADD8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief URADD8 (SIMD 8-bit Unsigned Halving Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URADD8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer element additions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 8-bit unsigned integer elements in Rs1 with the 8-bit
 * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7F, Rb = 0x7F, Rt = 0x7F
 * * Ra = 0x80, Rb = 0x80, Rt = 0x80
 * * Ra = 0x40, Rb = 0x80, Rt = 0x60
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] + Rs2.B[x]) u>> 1;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URADD8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("uradd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.174. URADD8 ===== */

/* ===== Inline Function Start for 3.175. URADD16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief URADD16 (SIMD 16-bit Unsigned Halving Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URADD16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer element additions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit
 * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFF, Rb = 0x7FFF Rt = 0x7FFF
 * * Ra = 0x8000, Rb = 0x8000 Rt = 0x8000
 * * Ra = 0x4000, Rb = 0x8000 Rt = 0x6000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) u>> 1;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URADD16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("uradd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.175. URADD16 ===== */

/* ===== Inline Function Start for 3.176. URADD64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief URADD64 (64-bit Unsigned Halving Addition)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * URADD64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add two 64-bit unsigned integers. The result is halved to avoid overflow or saturation.
 *
 * **RV32 Description**:\n
 * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers
 * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by
 * Rs2(4,1). The 64-bit addition result is first logically right-shifted by 1 bit and then written to an
 * even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned
 * integer Rs2. The 64-bit addition result is first logically right-shifted by 1 bit and then written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
 * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
 * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
 * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) u>> 1;
 * * RV64:
 * Rd = (Rs1 + Rs2) u>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_URADD64(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("uradd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.176. URADD64 ===== */

/* ===== Inline Function Start for 3.177. URADDW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief URADDW (32-bit Unsigned Halving Addition)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * URADDW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add 32-bit unsigned integers and the results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the first 32-bit unsigned integer in Rs1 with the first 32-bit
 * unsigned integer in Rs2. The result is first logically right-shifted by 1 bit and then sign-extended and
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFFFFFF, Rb = 0x7FFFFFFF Rt = 0x7FFFFFFF
 * * Ra = 0x80000000, Rb = 0x80000000 Rt = 0x80000000
 * * Ra = 0x40000000, Rb = 0x80000000 Rt = 0x60000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1;
 * * RV64:
 * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1;
 * Rd[63:0] = SE(resw[31:0]);
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URADDW(unsigned int a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("uraddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.177. URADDW ===== */

/* ===== Inline Function Start for 3.178. URCRAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief URCRAS16 (SIMD 16-bit Unsigned Halving Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URCRAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element
 * subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
 * The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1
 * with the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned
 * integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks
 * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32-
 * bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD16` and `URSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) u>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) u>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URCRAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.178. URCRAS16 ===== */

/* ===== Inline Function Start for 3.179. URCRSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief URCRSA16 (SIMD 16-bit Unsigned Halving Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URCRSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element
 * addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
 * The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2
 * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned
 * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [31:16] of 32-bit chunks
 * in Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit
 * chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD16` and `URSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) u>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) u>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URCRSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.179. URCRSA16 ===== */

/* ===== Inline Function Start for 3.180. URSTAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief URSTAS16 (SIMD 16-bit Unsigned Halving Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URSTAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element
 * subtraction in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit
 * chunks. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1
 * with the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned
 * integer in [15:0] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks
 * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32-
 * bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD16` and `URSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) u>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) u>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSTAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.180. URSTAS16 ===== */

/* ===== Inline Function Start for 3.181. URSTSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief URSTSA16 (SIMD 16-bit Unsigned Halving Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URCRSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element
 * addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit
 * chunks. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2
 * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned
 * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [15:0] of 32-bit chunks in
 * Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit
 * chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD16` and `URSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) u>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) u>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSTSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.181. URSTSA16 ===== */

/* ===== Inline Function Start for 3.182. URSUB8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief URSUB8 (SIMD 8-bit Unsigned Halving Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URSUB8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer element subtractions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit
 * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7F, Rb = 0x80 Rt = 0xFF
 * * Ra = 0x80, Rb = 0x7F Rt = 0x00
 * * Ra = 0x80, Rb = 0x40 Rt = 0x20
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) u>> 1;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSUB8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ursub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.182. URSUB8 ===== */

/* ===== Inline Function Start for 3.183. URSUB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief URSUB16 (SIMD 16-bit Unsigned Halving Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URSUB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer element subtractions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit
 * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFF, Rb = 0x8000 Rt = 0xFFFF
 * * Ra = 0x8000, Rb = 0x7FFF Rt = 0x0000
 * * Ra = 0x8000, Rb = 0x4000 Rt = 0x2000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) u>> 1;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSUB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ursub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.183. URSUB16 ===== */

/* ===== Inline Function Start for 3.184. URSUB64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief URSUB64 (64-bit Unsigned Halving Subtraction)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * URSUB64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform a 64-bit unsigned integer subtraction. The result is halved to avoid overflow or
 * saturation.
 *
 * **RV32 Description**:\n
 * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of
 * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers
 * specified by Rs1(4,1). The subtraction result is first logically right-shifted by 1 bit and then written
 * to an even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction subtracts the 64-bit unsigned integer in Rs2 from the 64-bit
 * unsigned integer in Rs1. The subtraction result is first logically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
 * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
 * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
 * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) u>> 1;
 * * RV64:
 * Rd = (Rs1 - Rs2) u>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_URSUB64(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("ursub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.184. URSUB64 ===== */

/* ===== Inline Function Start for 3.185. URSUBW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief URSUBW (32-bit Unsigned Halving Subtraction)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * URSUBW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Subtract 32-bit unsigned integers and the result is halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit
 * signed integer in Rs1. The result is first logically right-shifted by 1 bit and then sign-extended and
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0xFFFFFFFF
 * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x00000000
 * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0x20000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1;
 * * RV64:
 * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1;
 * Rd[63:0] = SE(resw[31:0]);
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSUBW(unsigned int a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("ursubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.185. URSUBW ===== */

/* ===== Inline Function Start for 3.186. WEXTI ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief WEXTI (Extract Word from 64-bit Immediate)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * WEXTI Rd, Rs1, #LSBloc
 * ~~~
 *
 * **Purpose**:\n
 * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or
 * a register (RV64) starting from a specified immediate LSB bit position.
 *
 * **RV32 Description**:\n
 * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified
 * by Rs1(4,1) starting from a specified immediate LSB bit position, #LSBloc. The extracted word is
 * written to Rd.
 * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register
 * pair includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d`
 * register of the pair contains the low 32-bit of the 64-bit value.
 *
 * **RV64 Description**:\n
 * This instruction extracts a 32-bit word from a 64-bit value in Rs1 starting from a specified
 * immediate LSB bit position, #LSBloc. The extracted word is sign-extended and written to lower 32-
 * bit of Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs2(4,1),1'b1);
 * src[63:0] = Concat(R[Idx1], R[Idx0]);
 * Rd = src[31+LSBloc:LSBloc];
 * * RV64:
 * ExtractW = Rs1[31+LSBloc:LSBloc];
 * Rd = SE(ExtractW)
 * ~~~
 *
 * \param [in]  a    long long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_WEXTI(a, b)    \
    ({    \
        unsigned long result;    \
        long long __a = (long long)(a);    \
        __ASM volatile("wexti %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.186. WEXTI ===== */

/* ===== Inline Function Start for 3.187. WEXT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief WEXT (Extract Word from 64-bit)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * WEXT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or
 * a register (RV64) starting from a specified LSB bit position in a register.
 *
 * **RV32 Description**:\n
 * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified
 * by Rs1(4,1) starting from a specified LSB bit position, specified in Rs2[4:0]. The extracted word is
 * written to Rd.
 * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register
 * pair includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d`
 * register of the pair contains the low 32-bit of the 64-bit value.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1);
 * src[63:0] = Concat(R[Idx1], R[Idx0]);
 * LSBloc = Rs2[4:0];
 * Rd = src[31+LSBloc:LSBloc];
 * * RV64:
 * LSBloc = Rs2[4:0];
 * ExtractW = Rs1[31+LSBloc:LSBloc];
 * Rd = SE(ExtractW)
 * ~~~
 *
 * \param [in]  a    long long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_WEXT(long long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("wext %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.187. WEXT ===== */

/* ===== Inline Function Start for 3.188.1. ZUNPKD810 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief ZUNPKD810 (Unsigned Unpacking Bytes 1 & 0)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * ZUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
 * halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
 * // ZUNPKD810, x=1,y=0
 * // ZUNPKD820, x=2,y=0
 * // ZUNPKD830, x=3,y=0
 * // ZUNPKD831, x=3,y=1
 * // ZUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD810(unsigned long a)
{
    unsigned long result;
    __ASM volatile("zunpkd810 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.188.1. ZUNPKD810 ===== */

/* ===== Inline Function Start for 3.188.2. ZUNPKD820 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief ZUNPKD820 (Unsigned Unpacking Bytes 2 & 0)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * ZUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
 * halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
 * // ZUNPKD810, x=1,y=0
 * // ZUNPKD820, x=2,y=0
 * // ZUNPKD830, x=3,y=0
 * // ZUNPKD831, x=3,y=1
 * // ZUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD820(unsigned long a)
{
    unsigned long result;
    __ASM volatile("zunpkd820 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.188.2. ZUNPKD820 ===== */

/* ===== Inline Function Start for 3.188.3. ZUNPKD830 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief ZUNPKD830 (Unsigned Unpacking Bytes 3 & 0)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * ZUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
 * halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
 * // ZUNPKD810, x=1,y=0
 * // ZUNPKD820, x=2,y=0
 * // ZUNPKD830, x=3,y=0
 * // ZUNPKD831, x=3,y=1
 * // ZUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD830(unsigned long a)
{
    unsigned long result;
    __ASM volatile("zunpkd830 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.188.3. ZUNPKD830 ===== */

/* ===== Inline Function Start for 3.188.4. ZUNPKD831 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief ZUNPKD831 (Unsigned Unpacking Bytes 3 & 1)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * ZUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
 * halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
 * // ZUNPKD810, x=1,y=0
 * // ZUNPKD820, x=2,y=0
 * // ZUNPKD830, x=3,y=0
 * // ZUNPKD831, x=3,y=1
 * // ZUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD831(unsigned long a)
{
    unsigned long result;
    __ASM volatile("zunpkd831 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.188.4. ZUNPKD831 ===== */

/* ===== Inline Function Start for 3.188.5. ZUNPKD832 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief ZUNPKD832 (Unsigned Unpacking Bytes 3 & 2)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * ZUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
 * halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
 * // ZUNPKD810, x=1,y=0
 * // ZUNPKD820, x=2,y=0
 * // ZUNPKD830, x=3,y=0
 * // ZUNPKD831, x=3,y=1
 * // ZUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD832(unsigned long a)
{
    unsigned long result;
    __ASM volatile("zunpkd832 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.188.5. ZUNPKD832 ===== */

#if (__RISCV_XLEN == 64) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__)

/* ===== Inline Function Start for 4.1. ADD32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief ADD32 (SIMD 32-bit Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * ADD32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer elements in Rs1 with the 32-bit integer
 * elements in Rs2, and then writes the 32-bit element results to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned addition.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x] + Rs2.W[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ADD32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("add32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.1. ADD32 ===== */

/* ===== Inline Function Start for 4.2. CRAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief CRAS32 (SIMD 32-bit Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * CRAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit
 * chunk simultaneously. Operands are from crossed 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
 * integer element in [31:0] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts
 * the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and
 * writes the result to [31:0] of Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = Rs1.W[1] + Rs2.W[0];
 * Rd.W[0] = Rs1.W[0] - Rs2.W[1];
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CRAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("cras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.2. CRAS32 ===== */

/* ===== Inline Function Start for 4.3. CRSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief CRSA32 (SIMD 32-bit Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * CRSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit
 * chunk simultaneously. Operands are from crossed 32-bit elements.
 * *Description: *
 * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element
 * in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer
 * element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2, and writes the result to
 * [31:0] of Rd
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = Rs1.W[1] - Rs2.W[0];
 * Rd.W[0] = Rs1.W[0] + Rs2.W[1];
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CRSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("crsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.3. CRSA32 ===== */

/* ===== Inline Function Start for 4.4. KABS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
 * \brief KABS32 (Scalar 32-bit Absolute Value with Saturation)
 * \details
 * **Type**: DSP (RV64 Only)
24    20
19    15
14    12
11    7
KABS32
10010
Rs1
000
Rd
6    0
GE80B
1111111
 *
 * **Syntax**:\n
 * ~~~
 * KABS32 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Get the absolute value of signed 32-bit integer elements in a general register.
 *
 * **Description**:\n
 * This instruction calculates the absolute value of signed 32-bit integer elements stored
 * in Rs1. The results are written to Rd. This instruction with the minimum negative integer input of
 * 0x80000000 will produce a saturated output of maximum positive integer of 0x7fffffff and the OV
 * flag will be set to 1.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs1.W[x] >= 0) {
 *   res[x] = Rs1.W[x];
 * } else {
 *   If (Rs1.W[x] == 0x80000000) {
 *     res[x] = 0x7fffffff;
 *     OV = 1;
 *   } else {
 *     res[x] = -Rs1.W[x];
 *   }
 * }
 * Rd.W[x] = res[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KABS32(unsigned long a)
{
    unsigned long result;
    __ASM volatile("kabs32 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 4.4. KABS32 ===== */

/* ===== Inline Function Start for 4.5. KADD32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief KADD32 (SIMD 32-bit Signed Saturating Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KADD32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed
 * integer elements in Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1),
 * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.W[x] + Rs2.W[x];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KADD32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.5. KADD32 ===== */

/* ===== Inline Function Start for 4.6. KCRAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief KCRAS32 (SIMD 32-bit Signed Saturating Cross Addition & Subtraction)
 * \details
 * **Type**: SIM (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KCRAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating addition and 32-bit signed integer element
 * saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
 * integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit integer element in [63:32] of
 * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number
 * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] + Rs2.W[0];
 * res[0] = Rs1.W[0] - Rs2.W[1];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[1] = res[1];
 * Rd.W[0] = res[0];
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KCRAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.6. KCRAS32 ===== */

/* ===== Inline Function Start for 4.7. KCRSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief KCRSA32 (SIMD 32-bit Signed Saturating Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KCRSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element
 * saturating addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.
 * *Description: *
 * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element
 * in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit
 * integer element in [63:32] of Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31
 * <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
 * [63:32] of Rd for subtraction and [31:0] of Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] - Rs2.W[0];
 * res[0] = Rs1.W[0] + Rs2.W[1];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[1] = res[1];
 * Rd.W[0] = res[0];
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KCRSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.7. KCRSA32 ===== */

/* ===== Inline Function Start for 4.8.1. KDMBB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KDMBB16 (SIMD Signed Saturating Double Multiply B16 x B16)
 * \details
 * **Type**: SIMD (RV64 only)
 *
 * **Syntax**:\n
 * ~~~
 * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
 * in the destination register. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
 * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
 * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
 * and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
 *   Mresult[z] = aop[z] * bop[z];
 *   resQ31[z] = Mresult[z] << 1;
 * } else {
 *   resQ31[z] = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * Rd.W[z] = resQ31[z];
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KDMBB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kdmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.8.1. KDMBB16 ===== */

/* ===== Inline Function Start for 4.8.2. KDMBT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KDMBT16 (SIMD Signed Saturating Double Multiply B16 x T16)
 * \details
 * **Type**: SIMD (RV64 only)
 *
 * **Syntax**:\n
 * ~~~
 * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
 * in the destination register. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
 * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
 * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
 * and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
 *   Mresult[z] = aop[z] * bop[z];
 *   resQ31[z] = Mresult[z] << 1;
 * } else {
 *   resQ31[z] = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * Rd.W[z] = resQ31[z];
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KDMBT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kdmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.8.2. KDMBT16 ===== */

/* ===== Inline Function Start for 4.8.3. KDMTT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KDMTT16 (SIMD Signed Saturating Double Multiply T16 x T16)
 * \details
 * **Type**: SIMD (RV64 only)
 *
 * **Syntax**:\n
 * ~~~
 * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
 * in the destination register. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
 * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
 * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
 * and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
 *   Mresult[z] = aop[z] * bop[z];
 *   resQ31[z] = Mresult[z] << 1;
 * } else {
 *   resQ31[z] = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * Rd.W[z] = resQ31[z];
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KDMTT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kdmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.8.3. KDMTT16 ===== */

/* ===== Inline Function Start for 4.9.1. KDMABB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KDMABB16 (SIMD Signed Saturating Double Multiply Addition B16 x B16)
 * \details
 * **Type**: SIMD (RV64 only)
 *
 * **Syntax**:\n
 * ~~~
 * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
 * the values of the corresponding 32-bit chunks from the destination register and write the saturated
 * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
 * happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
 * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
 * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
 * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
 * are written back to Rd.
 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
 * set.
 *
 * **Operations**:\n
 * ~~~
 * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
 *   Mresult[z] = aop[z] * bop[z];
 *   resQ31[z] = Mresult[z] << 1;
 * } else {
 *   resQ31[z] = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * resadd[z] = Rd.W[z] + resQ31[z];
 * if (resadd[z] > (2^31)-1) {
 *   resadd[z] = (2^31)-1;
 *   OV = 1;
 * } else if (resadd[z] < -2^31) {
 *   resadd[z] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[z] = resadd[z];
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KDMABB16(unsigned long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kdmabb16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.9.1. KDMABB16 ===== */

/* ===== Inline Function Start for 4.9.2. KDMABT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KDMABT16 (SIMD Signed Saturating Double Multiply Addition B16 x T16)
 * \details
 * **Type**: SIMD (RV64 only)
 *
 * **Syntax**:\n
 * ~~~
 * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
 * the values of the corresponding 32-bit chunks from the destination register and write the saturated
 * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
 * happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
 * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
 * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
 * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
 * are written back to Rd.
 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
 * set.
 *
 * **Operations**:\n
 * ~~~
 * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
 *   Mresult[z] = aop[z] * bop[z];
 *   resQ31[z] = Mresult[z] << 1;
 * } else {
 *   resQ31[z] = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * resadd[z] = Rd.W[z] + resQ31[z];
 * if (resadd[z] > (2^31)-1) {
 *   resadd[z] = (2^31)-1;
 *   OV = 1;
 * } else if (resadd[z] < -2^31) {
 *   resadd[z] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[z] = resadd[z];
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KDMABT16(unsigned long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kdmabt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.9.2. KDMABT16 ===== */

/* ===== Inline Function Start for 4.9.3. KDMATT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KDMATT16 (SIMD Signed Saturating Double Multiply Addition T16 x T16)
 * \details
 * **Type**: SIMD (RV64 only)
 *
 * **Syntax**:\n
 * ~~~
 * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
 * the values of the corresponding 32-bit chunks from the destination register and write the saturated
 * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
 * happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
 * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
 * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
 * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
 * are written back to Rd.
 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
 * set.
 *
 * **Operations**:\n
 * ~~~
 * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
 *   Mresult[z] = aop[z] * bop[z];
 *   resQ31[z] = Mresult[z] << 1;
 * } else {
 *   resQ31[z] = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * resadd[z] = Rd.W[z] + resQ31[z];
 * if (resadd[z] > (2^31)-1) {
 *   resadd[z] = (2^31)-1;
 *   OV = 1;
 * } else if (resadd[z] < -2^31) {
 *   resadd[z] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[z] = resadd[z];
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KDMATT16(unsigned long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kdmatt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.9.3. KDMATT16 ===== */

/* ===== Inline Function Start for 4.10.1. KHMBB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KHMBB16 (SIMD Signed Saturating Half Multiply B16 x B16)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
 * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
 * overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
 * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
 * to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop = Rs1.H[x]; bop = Rs2.H[y];
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult[31:0] = aop * bop;
 *   res[15:0] = Mresult[30:15];
 * } else {
 *   res[15:0] = 0x7FFF;
 *   OV = 1;
 * }
 * Rd.W[z] = SE32(res[15:0]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KHMBB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("khmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.10.1. KHMBB16 ===== */

/* ===== Inline Function Start for 4.10.2. KHMBT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KHMBT16 (SIMD Signed Saturating Half Multiply B16 x T16)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
 * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
 * overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
 * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
 * to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop = Rs1.H[x]; bop = Rs2.H[y];
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult[31:0] = aop * bop;
 *   res[15:0] = Mresult[30:15];
 * } else {
 *   res[15:0] = 0x7FFF;
 *   OV = 1;
 * }
 * Rd.W[z] = SE32(res[15:0]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KHMBT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("khmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.10.2. KHMBT16 ===== */

/* ===== Inline Function Start for 4.10.3. KHMTT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KHMTT16 (SIMD Signed Saturating Half Multiply T16 x T16)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
 * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
 * overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
 * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
 * to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop = Rs1.H[x]; bop = Rs2.H[y];
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult[31:0] = aop * bop;
 *   res[15:0] = Mresult[30:15];
 * } else {
 *   res[15:0] = 0x7FFF;
 *   OV = 1;
 * }
 * Rd.W[z] = SE32(res[15:0]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KHMTT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("khmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.10.3. KHMTT16 ===== */

/* ===== Inline Function Start for 4.11.1. KMABB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
 * \brief KMABB32 (Saturating Signed Multiply Bottom Words & Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMABB32 Rd, Rs1, Rs2
 * KMABT32 Rd, Rs1, Rs2
 * KMATT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element in a register with the 32-bit element in another register
 * and add the result to the content of 64-bit data in the third register. The addition result may be
 * saturated and is written to the third register.
 * * KMABB32: rd + bottom*bottom
 * * KMABT32: rd + bottom*top
 * * KMATT32: rd + top*top
 *
 * **Description**:\n
 * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2.
 * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
 * element in Rs2.
 * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
 * element in Rs2.
 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
 * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
 * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
 *  res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
 *  res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
 *  if (res > (2^63)-1) {
 *    res = (2^63)-1;
 *    OV = 1;
 *  } else if (res < -2^63) {
 *    res = -2^63;
 *    OV = 1;
 *  }
 *  Rd = res;
 * *Exceptions:* None
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMABB32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmabb32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.11.1. KMABB32 ===== */

/* ===== Inline Function Start for 4.11.2. KMABT32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
 * \brief KMABT32 (Saturating Signed Multiply Bottom & Top Words & Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMABB32 Rd, Rs1, Rs2
 * KMABT32 Rd, Rs1, Rs2
 * KMATT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element in a register with the 32-bit element in another register
 * and add the result to the content of 64-bit data in the third register. The addition result may be
 * saturated and is written to the third register.
 * * KMABB32: rd + bottom*bottom
 * * KMABT32: rd + bottom*top
 * * KMATT32: rd + top*top
 *
 * **Description**:\n
 * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2.
 * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
 * element in Rs2.
 * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
 * element in Rs2.
 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
 * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
 * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
 *  res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
 *  res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
 *  if (res > (2^63)-1) {
 *    res = (2^63)-1;
 *    OV = 1;
 *  } else if (res < -2^63) {
 *    res = -2^63;
 *    OV = 1;
 *  }
 *  Rd = res;
 * *Exceptions:* None
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMABT32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmabt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.11.2. KMABT32 ===== */

/* ===== Inline Function Start for 4.11.3. KMATT32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
 * \brief KMATT32 (Saturating Signed Multiply Top Words & Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMABB32 Rd, Rs1, Rs2
 * KMABT32 Rd, Rs1, Rs2
 * KMATT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element in a register with the 32-bit element in another register
 * and add the result to the content of 64-bit data in the third register. The addition result may be
 * saturated and is written to the third register.
 * * KMABB32: rd + bottom*bottom
 * * KMABT32: rd + bottom*top
 * * KMATT32: rd + top*top
 *
 * **Description**:\n
 * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2.
 * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
 * element in Rs2.
 * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
 * element in Rs2.
 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
 * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
 * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
 *  res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
 *  res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
 *  if (res > (2^63)-1) {
 *    res = (2^63)-1;
 *    OV = 1;
 *  } else if (res < -2^63) {
 *    res = -2^63;
 *    OV = 1;
 *  }
 *  Rd = res;
 * *Exceptions:* None
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMATT32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmatt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.11.3. KMATT32 ===== */

/* ===== Inline Function Start for 4.12.1. KMADA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMADA32 (Saturating Signed Multiply Two Words and Two Adds)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMADA32 Rd, Rs1, Rs2
 * KMAXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the
 * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated.
 * * KMADA32: rd + top*top + bottom*bottom
 * * KMAXDA32: rd + top*bottom + bottom*top
 *
 * **Description**:\n
 * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-
 * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1
 * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction.
 * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
 * with the top 32-bit element in Rs2.
 * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63
 * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit
 * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32
 * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMADA32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.12.1. KMADA32 ===== */

/* ===== Inline Function Start for 4.12.2. KMAXDA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMAXDA32 (Saturating Signed Crossed Multiply Two Words and Two Adds)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMADA32 Rd, Rs1, Rs2
 * KMAXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the
 * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated.
 * * KMADA32: rd + top*top + bottom*bottom
 * * KMAXDA32: rd + top*bottom + bottom*top
 *
 * **Description**:\n
 * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-
 * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1
 * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction.
 * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
 * with the top 32-bit element in Rs2.
 * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63
 * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit
 * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32
 * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMAXDA32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmaxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.12.2. KMAXDA32 ===== */

/* ===== Inline Function Start for 4.13.1. KMDA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMDA32 (Signed Multiply Two Words and Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMDA32 Rd, Rs1, Rs2
 * KMXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
 * adds the two 64-bit results together. The addition result may be saturated.
 * * KMDA32: top*top + bottom*bottom
 * * KMXDA32: top*bottom + bottom*top
 *
 * **Description**:\n
 * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
 * with the top 32-bit element of Rs2.
 * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
 * with the bottom 32-bit element of Rs2.
 * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1.
 * The final result is written to Rd. The 32-bit contents are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) {
 *   Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32
 *   Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32
 * } else {
 *   Rd = 0x7fffffffffffffff;
 *   OV = 1;
 * }
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMDA32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("kmda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.13.1. KMDA32 ===== */

/* ===== Inline Function Start for 4.13.2. KMXDA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMXDA32 (Signed Crossed Multiply Two Words and Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMDA32 Rd, Rs1, Rs2
 * KMXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
 * adds the two 64-bit results together. The addition result may be saturated.
 * * KMDA32: top*top + bottom*bottom
 * * KMXDA32: top*bottom + bottom*top
 *
 * **Description**:\n
 * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
 * with the top 32-bit element of Rs2.
 * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
 * with the bottom 32-bit element of Rs2.
 * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1.
 * The final result is written to Rd. The 32-bit contents are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) {
 *   Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32
 *   Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32
 * } else {
 *   Rd = 0x7fffffffffffffff;
 *   OV = 1;
 * }
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMXDA32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("kmxda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.13.2. KMXDA32 ===== */

/* ===== Inline Function Start for 4.14.1. KMADS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMADS32 (Saturating Signed Multiply Two Words & Subtract & Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMADS32 Rd, Rs1, Rs2
 * KMADRS32 Rd, Rs1, Rs2
 * KMAXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
 * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
 * 64-bit data in a third register. The addition result may be saturated.
 * * KMADS32: rd + (top*top - bottom*bottom)
 * * KMADRS32: rd + (bottom*bottom - top*top)
 * * KMAXDS32: rd + (top*bottom - bottom*top)
 *
 * **Description**:\n
 * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
 * Rs1 with the top 32-bit element in Rs2.
 * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element in Rs1 with the bottom 32-bit element in Rs2.
 * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
 * Rs1 with the bottom 32-bit element in Rs2.
 * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
 * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
 * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
 * as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
 * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
 * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMADS32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmads32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.14.1. KMADS32 ===== */

/* ===== Inline Function Start for 4.14.2. KMADRS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMADRS32 (Saturating Signed Multiply Two Words & Reverse Subtract & Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMADS32 Rd, Rs1, Rs2
 * KMADRS32 Rd, Rs1, Rs2
 * KMAXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
 * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
 * 64-bit data in a third register. The addition result may be saturated.
 * * KMADS32: rd + (top*top - bottom*bottom)
 * * KMADRS32: rd + (bottom*bottom - top*top)
 * * KMAXDS32: rd + (top*bottom - bottom*top)
 *
 * **Description**:\n
 * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
 * Rs1 with the top 32-bit element in Rs2.
 * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element in Rs1 with the bottom 32-bit element in Rs2.
 * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
 * Rs1 with the bottom 32-bit element in Rs2.
 * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
 * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
 * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
 * as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
 * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
 * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMADRS32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmadrs32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.14.2. KMADRS32 ===== */

/* ===== Inline Function Start for 4.14.3. KMAXDS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMAXDS32 (Saturating Signed Crossed Multiply Two Words & Subtract & Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMADS32 Rd, Rs1, Rs2
 * KMADRS32 Rd, Rs1, Rs2
 * KMAXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
 * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
 * 64-bit data in a third register. The addition result may be saturated.
 * * KMADS32: rd + (top*top - bottom*bottom)
 * * KMADRS32: rd + (bottom*bottom - top*top)
 * * KMAXDS32: rd + (top*bottom - bottom*top)
 *
 * **Description**:\n
 * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
 * Rs1 with the top 32-bit element in Rs2.
 * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element in Rs1 with the bottom 32-bit element in Rs2.
 * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
 * Rs1 with the bottom 32-bit element in Rs2.
 * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
 * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
 * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
 * as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
 * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
 * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMAXDS32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmaxds32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.14.3. KMAXDS32 ===== */

/* ===== Inline Function Start for 4.15.1. KMSDA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMSDA32 (Saturating Signed Multiply Two Words & Add & Subtract)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMSDA32 Rd, Rs1, Rs2
 * KMSXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
 * subtracts the two 64-bit results from a third register. The subtraction result may be saturated.
 * * KMSDA: rd - top*top - bottom*bottom
 * * KMSXDA: rd - top*bottom - bottom*top
 *
 * **Description**:\n
 * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
 * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
 * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction
 * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit
 * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32
 * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMSDA32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmsda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.15.1. KMSDA32 ===== */

/* ===== Inline Function Start for 4.15.2. KMSXDA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMSXDA32 (Saturating Signed Crossed Multiply Two Words & Add & Subtract)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMSDA32 Rd, Rs1, Rs2
 * KMSXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
 * subtracts the two 64-bit results from a third register. The subtraction result may be saturated.
 * * KMSDA: rd - top*top - bottom*bottom
 * * KMSXDA: rd - top*bottom - bottom*top
 *
 * **Description**:\n
 * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
 * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
 * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction
 * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit
 * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32
 * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMSXDA32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmsxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.15.2. KMSXDA32 ===== */

/* ===== Inline Function Start for 4.16. KSLL32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief KSLL32 (SIMD 32-bit Saturating Shift Left Logical)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KSLL32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift
 * amount is a variable from a GPR.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
 * with zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register.
 * Any shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is
 * saturated to -2^31. And the saturated results are written to Rd. If any saturation is performed, set OV
 * bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[4:0];
 * if (sa != 0) {
 *   res[(31+sa):0] = Rs1.W[x] << sa;
 *   if (res > (2^31)-1) {
 *     res = 0x7fffffff; OV = 1;
 *   } else if (res < -2^31) {
 *     res = 0x80000000; OV = 1;
 *   }
 *   Rd.W[x] = res[31:0];
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLL32(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("ksll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.16. KSLL32 ===== */

/* ===== Inline Function Start for 4.17. KSLLI32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief KSLLI32 (SIMD 32-bit Saturating Shift Left Logical Immediate)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KSLLI32 Rd, Rs1, imm5u
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift
 * amount is an immediate value.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
 * with zero and the shift amount is specified by the imm5u constant. Any shifted value greater than
 * 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated
 * results are written to Rd. If any saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u[4:0];
 * if (sa != 0) {
 *   res[(31+sa):0] = Rs1.W[x] << sa;
 *   if (res > (2^31)-1) {
 *     res = 0x7fffffff; OV = 1;
 *   } else if (res < -2^31) {
 *     res = 0x80000000; OV = 1;
 *   }
 *   Rd.W[x] = res[31:0];
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_KSLLI32(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("kslli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 4.17. KSLLI32 ===== */

/* ===== Inline Function Start for 4.18.1. KSLRA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief KSLRA32 (SIMD 32-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KSLRA32 Rd, Rs1, Rs2
 * KSLRA32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the
 * right shift.
 *
 * **Description**:\n
 * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
 * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be
 * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`.
 * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u`
 * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[5:0] < 0) {
 *   sa = -Rs2[5:0];
 *   sa = (sa == 32)? 31 : sa;
 *   if (`.u` form) {
 *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   } else {
 *     Rd.W[x] = SE32(Rs1.W[x][31:sa]);
 *   }
 * } else {
 *   sa = Rs2[4:0];
 *   res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
 *   if (res > (2^31)-1) {
 *     res[31:0] = 0x7fffffff; OV = 1;
 *   } else if (res < -2^31) {
 *     res[31:0] = 0x80000000; OV = 1;
 *   }
 *   Rd.W[x] = res[31:0];
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLRA32(unsigned long a, int b)
{
    unsigned long result;
    __ASM volatile("kslra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.18.1. KSLRA32 ===== */

/* ===== Inline Function Start for 4.18.2. KSLRA32.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief KSLRA32.u (SIMD 32-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KSLRA32 Rd, Rs1, Rs2
 * KSLRA32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the
 * right shift.
 *
 * **Description**:\n
 * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
 * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be
 * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`.
 * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u`
 * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[5:0] < 0) {
 *   sa = -Rs2[5:0];
 *   sa = (sa == 32)? 31 : sa;
 *   if (`.u` form) {
 *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   } else {
 *     Rd.W[x] = SE32(Rs1.W[x][31:sa]);
 *   }
 * } else {
 *   sa = Rs2[4:0];
 *   res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
 *   if (res > (2^31)-1) {
 *     res[31:0] = 0x7fffffff; OV = 1;
 *   } else if (res < -2^31) {
 *     res[31:0] = 0x80000000; OV = 1;
 *   }
 *   Rd.W[x] = res[31:0];
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLRA32_U(unsigned long a, int b)
{
    unsigned long result;
    __ASM volatile("kslra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.18.2. KSLRA32.u ===== */

/* ===== Inline Function Start for 4.19. KSTAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief KSTAS32 (SIMD 32-bit Signed Saturating Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KSTAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating addition and 32-bit signed integer element
 * saturating subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit
 * elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
 * integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit integer element in [31:0] of
 * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number
 * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] + Rs2.W[1];
 * res[0] = Rs1.W[0] - Rs2.W[0];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[1] = res[1];
 * Rd.W[0] = res[0];
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSTAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.19. KSTAS32 ===== */

/* ===== Inline Function Start for 4.20. KSTSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief KSTSA32 (SIMD 32-bit Signed Saturating Straight Subtraction & Addition)
 * \details
 * **Type**: SIM (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KSTSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element
 * saturating addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit
 * elements.
 * *Description: *
 * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer
 * element in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with
 * the 32-bit integer element in [31:0] of Rs2. If any of the results are beyond the Q31 number range (
 * -2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
 * written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] - Rs2.W[1];
 * res[0] = Rs1.W[0] + Rs2.W[0];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[1] = res[1];
 * Rd.W[0] = res[0];
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSTSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.20. KSTSA32 ===== */

/* ===== Inline Function Start for 4.21. KSUB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief KSUB32 (SIMD 32-bit Signed Saturating Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KSUB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit
 * signed integer elements in Rs1. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <=
 * 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.W[x] - Rs2.W[x];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSUB32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.21. KSUB32 ===== */

/* ===== Inline Function Start for 4.22.1. PKBB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
 * \brief PKBB32 (Pack Two 32-bit Data from Both Bottom Half)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * PKBB32 Rd, Rs1, Rs2
 * PKBT32 Rd, Rs1, Rs2
 * PKTT32 Rd, Rs1, Rs2
 * PKTB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * PKBB32: bottom.bottom
 * * PKBT32: bottom.top
 * * PKTT32: top.top
 * * PKTB32: top.bottom
 *
 * **Description**:\n
 * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKBB32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pkbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.22.1. PKBB32 ===== */

/* ===== Inline Function Start for 4.22.2. PKBT32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
 * \brief PKBT32 (Pack Two 32-bit Data from Bottom and Top Half)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * PKBB32 Rd, Rs1, Rs2
 * PKBT32 Rd, Rs1, Rs2
 * PKTT32 Rd, Rs1, Rs2
 * PKTB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * PKBB32: bottom.bottom
 * * PKBT32: bottom.top
 * * PKTT32: top.top
 * * PKTB32: top.bottom
 *
 * **Description**:\n
 * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKBT32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pkbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.22.2. PKBT32 ===== */

/* ===== Inline Function Start for 4.22.3. PKTT32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
 * \brief PKTT32 (Pack Two 32-bit Data from Both Top Half)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * PKBB32 Rd, Rs1, Rs2
 * PKBT32 Rd, Rs1, Rs2
 * PKTT32 Rd, Rs1, Rs2
 * PKTB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * PKBB32: bottom.bottom
 * * PKBT32: bottom.top
 * * PKTT32: top.top
 * * PKTB32: top.bottom
 *
 * **Description**:\n
 * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKTT32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pktt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.22.3. PKTT32 ===== */

/* ===== Inline Function Start for 4.22.4. PKTB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
 * \brief PKTB32 (Pack Two 32-bit Data from Top and Bottom Half)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * PKBB32 Rd, Rs1, Rs2
 * PKBT32 Rd, Rs1, Rs2
 * PKTT32 Rd, Rs1, Rs2
 * PKTB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * PKBB32: bottom.bottom
 * * PKBT32: bottom.top
 * * PKTT32: top.top
 * * PKTB32: top.bottom
 *
 * **Description**:\n
 * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKTB32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pktb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.22.4. PKTB32 ===== */

/* ===== Inline Function Start for 4.23. RADD32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief RADD32 (SIMD 32-bit Signed Halving Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * RADD32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element additions simultaneously. The results are halved to avoid
 * overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed
 * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
 * Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF Rd = 0x7FFFFFFF
 * * Rs1 = 0x80000000, Rs2 = 0x80000000 Rd = 0x80000000
 * * Rs1 = 0x40000000, Rs2 = 0x80000000 Rd = 0xE0000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] + Rs2.W[x]) s>> 1;
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RADD32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("radd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.23. RADD32 ===== */

/* ===== Inline Function Start for 4.24. RCRAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief RCRAS32 (SIMD 32-bit Signed Halving Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * RCRAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in
 * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit
 * signed integer element in [31:0] of Rs2, and subtracts the 32-bit signed integer element in [63:32] of
 * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first
 * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd
 * for subtraction.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD32` and `RSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) s>> 1;
 * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) s>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RCRAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.24. RCRAS32 ===== */

/* ===== Inline Function Start for 4.25. RCRSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief RCRSA32 (SIMD 32-bit Signed Halving Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * RCRSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in
 * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer element in [31:0] of Rs2 from the
 * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0]
 * of Rs1 with the 32-bit signed integer element in [63:32] of Rs2. The two results are first
 * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of
 * Rd for addition.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD32` and `RSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) s>> 1;
 * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) s>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RCRSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.25. RCRSA32 ===== */

/* ===== Inline Function Start for 4.26. RSTAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief RSTAS32 (SIMD 32-bit Signed Halving Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * RSTAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in
 * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are
 * halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit
 * signed integer element in [63:32] of Rs2, and subtracts the 32-bit signed integer element in [31:0] of
 * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first
 * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd
 * for subtraction.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD32` and `RSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) s>> 1;
 * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) s>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RSTAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.26. RSTAS32 ===== */

/* ===== Inline Function Start for 4.27. RSTSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief RSTSA32 (SIMD 32-bit Signed Halving Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * RSTSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in
 * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are
 * halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer element in [63:32] of Rs2 from the
 * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0]
 * of Rs1 with the 32-bit signed integer element in [31:0] of Rs2. The two results are first arithmetically
 * right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD32` and `RSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) s>> 1;
 * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) s>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RSTSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.27. RSTSA32 ===== */

/* ===== Inline Function Start for 4.28. RSUB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief RSUB32 (SIMD 32-bit Signed Halving Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * RSUB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element subtractions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit
 * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0x7FFFFFFF
 * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x80000000
 * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0xA0000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) s>> 1;
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RSUB32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.28. RSUB32 ===== */

/* ===== Inline Function Start for 4.29. SLL32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SLL32 (SIMD 32-bit Shift Left Logical)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SLL32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements logical left shift operations simultaneously. The shift amount is a
 * variable from a GPR.
 *
 * **Description**:\n
 * The 32-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
 * The shifted out bits are filled with zero and the shift amount is specified by the low-order 5-bits of
 * the value in the Rs2 register.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[4:0];
 * Rd.W[x] = Rs1.W[x] << sa;
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SLL32(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("sll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.29. SLL32 ===== */

/* ===== Inline Function Start for 4.30. SLLI32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SLLI32 (SIMD 32-bit Shift Left Logical Immediate)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SLLI32 Rd, Rs1, imm5u[4:0]
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit element logical left shift operations simultaneously. The shift amount is an
 * immediate value.
 *
 * **Description**:\n
 * The 32-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with
 * zero and the shift amount is specified by the imm5u[4:0] constant. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u[4:0];
 * Rd.W[x] = Rs1.W[x] << sa;
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SLLI32(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("slli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 4.30. SLLI32 ===== */

/* ===== Inline Function Start for 4.31. SMAX32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
 * \brief SMAX32 (SIMD 32-bit Signed Maximum)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMAX32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer elements finding maximum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit
 * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] > Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SMAX32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("smax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.31. SMAX32 ===== */

/* ===== Inline Function Start for 4.32.1. SMBB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
 * \brief SMBB32 (Signed Multiply Bottom Word & Bottom Word)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMBB32 Rd, Rs1, Rs2
 * SMBT32 Rd, Rs1, Rs2
 * SMTT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
 * register and write the 64-bit result to a third register.
 * * SMBB32: bottom*bottom
 * * SMBT32: bottom*top
 * * SMTT32: top*top
 *
 * **Description**:\n
 * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2. It is actually an alias of `MULSR64` instruction.
 * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2.
 * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
 * of Rs2.
 * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
 * signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
 * // SMTT32 Rd = res;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMBB32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.32.1. SMBB32 ===== */

/* ===== Inline Function Start for 4.32.2. SMBT32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
 * \brief SMBT32 (Signed Multiply Bottom Word & Top Word)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMBB32 Rd, Rs1, Rs2
 * SMBT32 Rd, Rs1, Rs2
 * SMTT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
 * register and write the 64-bit result to a third register.
 * * SMBB32: bottom*bottom
 * * SMBT32: bottom*top
 * * SMTT32: top*top
 *
 * **Description**:\n
 * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2. It is actually an alias of `MULSR64` instruction.
 * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2.
 * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
 * of Rs2.
 * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
 * signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
 * // SMTT32 Rd = res;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMBT32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.32.2. SMBT32 ===== */

/* ===== Inline Function Start for 4.32.3. SMTT32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
 * \brief SMTT32 (Signed Multiply Top Word & Top Word)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMBB32 Rd, Rs1, Rs2
 * SMBT32 Rd, Rs1, Rs2
 * SMTT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
 * register and write the 64-bit result to a third register.
 * * SMBB32: bottom*bottom
 * * SMBT32: bottom*top
 * * SMTT32: top*top
 *
 * **Description**:\n
 * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2. It is actually an alias of `MULSR64` instruction.
 * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2.
 * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
 * of Rs2.
 * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
 * signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
 * // SMTT32 Rd = res;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMTT32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smtt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.32.3. SMTT32 ===== */

/* ===== Inline Function Start for 4.33.1. SMDS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief SMDS32 (Signed Multiply Two Words and Subtract)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMDS32 Rd, Rs1, Rs2
 * SMDRS32 Rd, Rs1, Rs2
 * SMXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
 * perform a subtraction operation between the two 64-bit results.
 * * SMDS32: top*top - bottom*bottom
 * * SMDRS32: bottom*bottom - top*top
 * * SMXDS32: top*bottom - bottom*top
 *
 * **Description**:\n
 * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the top 32-bit element of Rs2.
 * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element of Rs1 with the bottom 32-bit element of Rs2.
 * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the bottom 32-bit element of Rs2.
 * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
 * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
 * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMDS32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.33.1. SMDS32 ===== */

/* ===== Inline Function Start for 4.33.2. SMDRS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief SMDRS32 (Signed Multiply Two Words and Reverse Subtract)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMDS32 Rd, Rs1, Rs2
 * SMDRS32 Rd, Rs1, Rs2
 * SMXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
 * perform a subtraction operation between the two 64-bit results.
 * * SMDS32: top*top - bottom*bottom
 * * SMDRS32: bottom*bottom - top*top
 * * SMXDS32: top*bottom - bottom*top
 *
 * **Description**:\n
 * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the top 32-bit element of Rs2.
 * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element of Rs1 with the bottom 32-bit element of Rs2.
 * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the bottom 32-bit element of Rs2.
 * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
 * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
 * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMDRS32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smdrs32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.33.2. SMDRS32 ===== */

/* ===== Inline Function Start for 4.33.3. SMXDS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief SMXDS32 (Signed Crossed Multiply Two Words and Subtract)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMDS32 Rd, Rs1, Rs2
 * SMDRS32 Rd, Rs1, Rs2
 * SMXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
 * perform a subtraction operation between the two 64-bit results.
 * * SMDS32: top*top - bottom*bottom
 * * SMDRS32: bottom*bottom - top*top
 * * SMXDS32: top*bottom - bottom*top
 *
 * **Description**:\n
 * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the top 32-bit element of Rs2.
 * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element of Rs1 with the bottom 32-bit element of Rs2.
 * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the bottom 32-bit element of Rs2.
 * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
 * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
 * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMXDS32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smxds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.33.3. SMXDS32 ===== */

/* ===== Inline Function Start for 4.34. SMIN32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
 * \brief SMIN32 (SIMD 32-bit Signed Minimum)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMIN32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer elements finding minimum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit
 * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] < Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SMIN32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("smin32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.34. SMIN32 ===== */

/* ===== Inline Function Start for 4.35.1. SRA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRA32 (SIMD 32-bit Shift Right Arithmetic)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRA32 Rd, Rs1, Rs2
 * SRA32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
 * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
 * added to the most significant discarded bit of each 32-bit data element to calculate the final results.
 * And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[4:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA32.u
 *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRA32
 *     Rd.W[x] = SE32(Rs1.W[x][31:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRA32(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.35.1. SRA32 ===== */

/* ===== Inline Function Start for 4.35.2. SRA32.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRA32.u (SIMD 32-bit Rounding Shift Right Arithmetic)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRA32 Rd, Rs1, Rs2
 * SRA32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
 * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
 * added to the most significant discarded bit of each 32-bit data element to calculate the final results.
 * And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[4:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA32.u
 *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRA32
 *     Rd.W[x] = SE32(Rs1.W[x][31:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRA32_U(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("sra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.35.2. SRA32.u ===== */

/* ===== Inline Function Start for 4.36.1. SRAI32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRAI32 (SIMD 32-bit Shift Right Arithmetic Immediate)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRAI32 Rd, Rs1, imm5u
 * SRAI32.u Rd, Rs1, imm5u
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is
 * an immediate value. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the
 * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
 * significant discarded bit of each 32-bit data to calculate the final results. And the results are written
 * to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u[4:0];
 *   if (sa > 0) {
 *   if (`.u` form) { // SRAI32.u
 *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRAI32
 *     Rd.W[x] = SE32(Rs1.W[x][31:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRAI32(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srai32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 4.36.1. SRAI32 ===== */

/* ===== Inline Function Start for 4.36.2. SRAI32.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRAI32.u (SIMD 32-bit Rounding Shift Right Arithmetic Immediate)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRAI32 Rd, Rs1, imm5u
 * SRAI32.u Rd, Rs1, imm5u
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is
 * an immediate value. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the
 * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
 * significant discarded bit of each 32-bit data to calculate the final results. And the results are written
 * to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u[4:0];
 *   if (sa > 0) {
 *   if (`.u` form) { // SRAI32.u
 *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRAI32
 *     Rd.W[x] = SE32(Rs1.W[x][31:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRAI32_U(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srai32.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 4.36.2. SRAI32.u ===== */

/* ===== Inline Function Start for 4.37. SRAIW.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT
 * \brief SRAIW.u (Rounding Shift Right Arithmetic Immediate Word)
 * \details
 * **Type**: DSP (RV64 only)
 *
 * **Syntax**:\n
 * ~~~
 * SRAIW.u Rd, Rs1, imm5u
 * ~~~
 *
 * **Purpose**:\n
 * Perform a 32-bit arithmetic right shift operation with rounding. The shift amount is an
 * immediate value.
 *
 * **Description**:\n
 * This instruction right-shifts the lower 32-bit content of Rs1 arithmetically. The shifted
 * out bits are filled with the sign-bit Rs1(31) and the shift amount is specified by the imm5u constant.
 * For the rounding operation, a value of 1 is added to the most significant discarded bit of the data to
 * calculate the final result. And the result is sign-extended and written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u;
 * if (sa != 0) {
 *   res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
 *   Rd = SE32(res[31:0]);
 * } else {
 *   Rd = SE32(Rs1.W[0]);
 * }
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
#define __RV_SRAIW_U(a, b)    \
    ({    \
        long result;    \
        int __a = (int)(a);    \
        __ASM volatile("sraiw.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 4.37. SRAIW.u ===== */

/* ===== Inline Function Start for 4.38.1. SRL32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRL32 (SIMD 32-bit Shift Right Logical)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRL32 Rd, Rs1, Rs2
 * SRL32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit element logical right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2
 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
 * discarded bit of each 32-bit data element to calculate the final results. And the results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[4:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA32.u
 *     res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRA32
 *     Rd.W[x] = ZE32(Rs1.W[x][31:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRL32(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("srl32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.38.1. SRL32 ===== */

/* ===== Inline Function Start for 4.38.2. SRL32.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRL32.u (SIMD 32-bit Rounding Shift Right Logical)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRL32 Rd, Rs1, Rs2
 * SRL32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit element logical right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2
 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
 * discarded bit of each 32-bit data element to calculate the final results. And the results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[4:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA32.u
 *     res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRA32
 *     Rd.W[x] = ZE32(Rs1.W[x][31:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRL32_U(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("srl32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.38.2. SRL32.u ===== */

/* ===== Inline Function Start for 4.39.1. SRLI32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRLI32 (SIMD 32-bit Shift Right Logical Immediate)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRLI32 Rd, Rs1, imm5u
 * SRLI32.u Rd, Rs1, imm5u
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding
 * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit
 * data to calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u[4:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRLI32.u
 *     res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRLI32
 *     Rd.W[x] = ZE32(Rs1.W[x][31:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRLI32(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 4.39.1. SRLI32 ===== */

/* ===== Inline Function Start for 4.39.2. SRLI32.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRLI32.u (SIMD 32-bit Rounding Shift Right Logical Immediate)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRLI32 Rd, Rs1, imm5u
 * SRLI32.u Rd, Rs1, imm5u
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding
 * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit
 * data to calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u[4:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRLI32.u
 *     res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRLI32
 *     Rd.W[x] = ZE32(Rs1.W[x][31:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRLI32_U(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srli32.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 4.39.2. SRLI32.u ===== */

/* ===== Inline Function Start for 4.40. STAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief STAS32 (SIMD 32-bit Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * STAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit
 * chunk simultaneously. Operands are from corresponding 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
 * integer element in [63:32] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts
 * the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and
 * writes the result to [31:0] of Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = Rs1.W[1] + Rs2.W[1];
 * Rd.W[0] = Rs1.W[0] - Rs2.W[0];
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_STAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("stas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.40. STAS32 ===== */

/* ===== Inline Function Start for 4.41. STSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief STSA32 (SIMD 32-bit Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * STSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit
 * chunk simultaneously. Operands are from corresponding 32-bit elements.
 * *Description: *
 * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer
 * element in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit
 * integer element in [31:0] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and writes the result
 * to [31:0] of Rd
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = Rs1.W[1] - Rs2.W[1];
 * Rd.W[0] = Rs1.W[0] + Rs2.W[0];
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_STSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("stsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.41. STSA32 ===== */

/* ===== Inline Function Start for 4.42. SUB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief SUB32 (SIMD 32-bit Subtraction)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SUB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit integer elements in Rs2 from the 32-bit integer
 * elements in Rs1, and then writes the results to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned subtraction.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x] - Rs2.W[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUB32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("sub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.42. SUB32 ===== */

/* ===== Inline Function Start for 4.43. UKADD32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief UKADD32 (SIMD 32-bit Unsigned Saturating Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UKADD32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 32-bit unsigned integer elements in Rs1 with the 32-bit
 * unsigned integer elements in Rs2. If any of the results are beyond the 32-bit unsigned number
 * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.W[x] + Rs2.W[x];
 * if (res[x] > (2^32)-1) {
 *   res[x] = (2^32)-1;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKADD32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.43. UKADD32 ===== */

/* ===== Inline Function Start for 4.44. UKCRAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief UKCRAS32 (SIMD 32-bit Unsigned Saturating Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UKCRAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned
 * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed
 * 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
 * bit unsigned integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit unsigned
 * integer element in [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the
 * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
 * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and
 * [31:0] of Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[1] + Rs2.W[0];
 * res2 = Rs1.W[0] - Rs2.W[1];
 * if (res1 > (2^32)-1) {
 *   res1 = (2^32)-1;
 *   OV = 1;
 * }
 * if (res2 < 0) {
 *   res2 = 0;
 *   OV = 1;
 * }
 * Rd.W[1] = res1;
 * Rd.W[0] = res2;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKCRAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.44. UKCRAS32 ===== */

/* ===== Inline Function Start for 4.45. UKCRSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief UKCRSA32 (SIMD 32-bit Unsigned Saturating Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UKCRSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned
 * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from crossed
 * 32-bit elements.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the
 * 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned
 * integer element in [63:32] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the
 * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
 * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and
 * [31:0] of Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[1] - Rs2.W[0];
 * res2 = Rs1.W[0] + Rs2.W[1];
 * if (res1 < 0) {
 *   res1 = 0;
 *   OV = 1;
 * } else if (res2 > (2^32)-1) {
 *   res2 = (2^32)-1;
 *   OV = 1;
 * }
 * Rd.W[1] = res1;
 * Rd.W[0] = res2;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKCRSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.45. UKCRSA32 ===== */

/* ===== Inline Function Start for 4.46. UKSTAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief UKSTAS32 (SIMD 32-bit Unsigned Saturating Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UKSTAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned
 * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from
 * corresponding 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
 * bit unsigned integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit unsigned
 * integer element in [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the
 * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
 * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and
 * [31:0] of Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[1] + Rs2.W[1];
 * res2 = Rs1.W[0] - Rs2.W[0];
 * if (res1 > (2^32)-1) {
 *   res1 = (2^32)-1;
 *   OV = 1;
 * }
 * if (res2 < 0) {
 *   res2 = 0;
 *   OV = 1;
 * }
 * Rd.W[1] = res1;
 * Rd.W[0] = res2;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSTAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.46. UKSTAS32 ===== */

/* ===== Inline Function Start for 4.47. UKSTSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief UKSTSA32 (SIMD 32-bit Unsigned Saturating Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UKSTSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned
 * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from
 * corresponding 32-bit elements.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from
 * the 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned
 * integer element in [31:0] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the
 * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
 * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and
 * [31:0] of Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[1] - Rs2.W[1];
 * res2 = Rs1.W[0] + Rs2.W[0];
 * if (res1 < 0) {
 *   res1 = 0;
 *   OV = 1;
 * } else if (res2 > (2^32)-1) {
 *   res2 = (2^32)-1;
 *   OV = 1;
 * }
 * Rd.W[1] = res1;
 * Rd.W[0] = res2;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSTSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.47. UKSTSA32 ===== */

/* ===== Inline Function Start for 4.48. UKSUB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief UKSUB32 (SIMD 32-bit Unsigned Saturating Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UKSUB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit
 * unsigned integer elements in Rs1. If any of the results are beyond the 32-bit unsigned number
 * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.W[x] - Rs2.W[x];
 * if (res[x] < 0) {
 *   res[x] = 0;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSUB32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("uksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.48. UKSUB32 ===== */

/* ===== Inline Function Start for 4.49. UMAX32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
 * \brief UMAX32 (SIMD 32-bit Unsigned Maximum)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UMAX32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer elements finding maximum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit
 * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] u> Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UMAX32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("umax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.49. UMAX32 ===== */

/* ===== Inline Function Start for 4.50. UMIN32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
 * \brief UMIN32 (SIMD 32-bit Unsigned Minimum)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UMIN32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer elements finding minimum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit
 * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] <u Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UMIN32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("umin32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.50. UMIN32 ===== */

/* ===== Inline Function Start for 4.51. URADD32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief URADD32 (SIMD 32-bit Unsigned Halving Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * URADD32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer element additions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit unsigned integer elements in Rs1 with the 32-bit
 * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFFFFFF, Rb = 0x7FFFFFFF Rt = 0x7FFFFFFF
 * * Ra = 0x80000000, Rb = 0x80000000 Rt = 0x80000000
 * * Ra = 0x40000000, Rb = 0x80000000 Rt = 0x60000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] + Rs2.W[x]) u>> 1;
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URADD32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("uradd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.51. URADD32 ===== */

/* ===== Inline Function Start for 4.52. URCRAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief URCRAS32 (SIMD 32-bit Unsigned Halving Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * URCRAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element
 * subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The
 * results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
 * bit unsigned integer element in [31:0] of Rs2, and subtracts the 32-bit unsigned integer element in
 * [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first
 * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for
 * subtraction.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD32` and `URSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) u>> 1;
 * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) u>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URCRAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.52. URCRAS32 ===== */

/* ===== Inline Function Start for 4.53. URCRSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief URCRSA32 (SIMD 32-bit Unsigned Halving Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * URCRSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element
 * addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results
 * are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the
 * 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer in
 * [31:0] of Rs1 with the 32-bit unsigned integer element in [63:32] of Rs2. The two results are first
 * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for
 * addition.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD32` and `URSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) u>> 1;
 * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) u>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URCRSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.53. URCRSA32 ===== */

/* ===== Inline Function Start for 4.54. URSTAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief URSTAS32 (SIMD 32-bit Unsigned Halving Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * URSTAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element
 * subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements.
 * The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
 * bit unsigned integer element in [63:32] of Rs2, and subtracts the 32-bit unsigned integer element in
 * [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first
 * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for
 * subtraction.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD32` and `URSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) u>> 1;
 * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) u>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSTAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.54. URSTAS32 ===== */

/* ===== Inline Function Start for 4.55. URSTSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief URSTSA32 (SIMD 32-bit Unsigned Halving Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * URSTSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element
 * addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The
 * results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from
 * the 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer
 * in [31:0] of Rs1 with the 32-bit unsigned integer element in [31:0] of Rs2. The two results are first
 * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for
 * addition.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD32` and `URSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) u>> 1;
 * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) u>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSTSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.55. URSTSA32 ===== */

/* ===== Inline Function Start for 4.56. URSUB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief URSUB32 (SIMD 32-bit Unsigned Halving Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * URSUB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer element subtractions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit
 * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFFFFFF, Rb = 0x80000000, Rt = 0xFFFFFFFF
 * * Ra = 0x80000000, Rb = 0x7FFFFFFF, Rt = 0x00000000
 * * Ra = 0x80000000, Rb = 0x40000000, Rt = 0x20000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) u>> 1;
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSUB32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ursub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.56. URSUB32 ===== */

#endif /* __RISCV_XLEN == 64 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default      Nuclei Default SIMD DSP Additional Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    (RV32 & RV64)Nuclei Customized DSP Instructions
 * \details  This is Nuclei customized DSP instructions for both RV32 and RV64
 */

/* ===== Inline Function Start for EXPD80 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD80 (Expand and Copy Byte 0 to 32bit(when rv32) or 64bit(when rv64))
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD80 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * When rv32, Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
 * When rv64, Copy 8-bit data from 64-bit chunks into 8 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[0][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD80(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd80 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD80 ===== */

/* ===== Inline Function Start for EXPD81 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD81 (Expand and Copy Byte 1 to 32bit(rv32) or 64bit(when rv64))
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD81 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[1][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD81(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd81 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD81 ===== */

/* ===== Inline Function Start for EXPD82 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD82 (Expand and Copy Byte 2 to 32bit(rv32) or 64bit(when rv64))
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD82 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[2][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD82(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd82 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD82 ===== */

/* ===== Inline Function Start for EXPD83 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD83 (Expand and Copy Byte 3 to 32bit(rv32) or 64bit(when rv64))
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD83 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[3][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD83(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd83 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD83 ===== */

#if (__RISCV_XLEN == 64)
/* ===== Inline Function Start for EXPD84 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD84 (Expand and Copy Byte 4 to 64bit)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD84 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[4][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[4][7:0], Rs1.B[4][7:0], Rs1.B[4][7:0], Rs1.B[4][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD84(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd84 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD84 ===== */

/* ===== Inline Function Start for EXPD85 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD85 (Expand and Copy Byte 5 to 64bit)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD85 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[5][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[5][7:0], Rs1.B[5][7:0], Rs1.B[5][7:0], Rs1.B[5][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD85(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd85 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD85 ===== */

/* ===== Inline Function Start for EXPD86 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD86 (Expand and Copy Byte 6 to 64bit)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD86 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[6][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[6][7:0], Rs1.B[6][7:0], Rs1.B[6][7:0], Rs1.B[6][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD86(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd86 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD86 ===== */

/* ===== Inline Function Start for EXPD87 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD87 (Expand and Copy Byte 7 to 64bit)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD87 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[7][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[7][7:0], Rs1.B[7][7:0], Rs1.B[7][7:0], Rs1.B[7][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD87(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd87 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD87 ===== */
#endif /* __RISCV_XLEN == 64 */

#if (__RISCV_XLEN == 32) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__)
/* XXXXX Nuclei Extended DSP Instructions for RV32 XXXXX */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1      Nuclei N1 SIMD DSP Additional Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    (RV32 only)Nuclei Customized N1 DSP Instructions
 * \details  This is Nuclei customized DSP N1 instructions only for RV32
 */
/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2      Nuclei N2 SIMD DSP Additional Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    (RV32 only)Nuclei Customized N2 DSP Instructions
 * \details  This is Nuclei customized DSP N2 instructions only for RV32
 */
/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3      Nuclei N3 SIMD DSP Additional Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    (RV32 only)Nuclei Customized N3 DSP Instructions
 * \details  This is Nuclei customized DSP N3 instructions only for RV32
 */

/* ===== Inline Function Start for DKHM8 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKHM8 (64-bit SIMD Signed Saturating Q7 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKHM8 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
 * numbers again.
 *
 * **Description**:\n
 * For the `DKHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
 * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
 * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
 *
 * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
 * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
 * The result will be saturated to 0x7F and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
 * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x80 != aop | 0x80 != bop) {
 *     res = (aop s* bop) >> 7;
 *   } else {
 *     res= 0x7F;
 *     OV = 1;
 *   }
 * }
 * Rd.H[x/2] = concat(rest, resb);
 * for RV32, x=0,2,4,6
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKHM8(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkhm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKHM8 ===== */

/* ===== Inline Function Start for DKHM16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKHM16 (64-bit SIMD Signed Saturating Q15 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKHM16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
 * Q15 numbers again.
 *
 * **Description**:\n
 * For the `DKHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
 * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
 * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
 * Rs2.
 *
 * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
 * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
 * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
 * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x8000 != aop | 0x8000 != bop) {
 *     res = (aop s* bop) >> 15;
 *   } else {
 *     res= 0x7FFF;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x/2] = concat(rest, resb);
 * for RV32: x=0, 2
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKHM16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkhm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKHM16 ===== */

/* ===== Inline Function Start for DKABS8 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKABS8 (64-bit SIMD 8-bit Saturating Absolute)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKABS8 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Get the absolute value of 8-bit signed integer elements simultaneously.
 *
 * **Description**:\n
 * This instruction calculates the absolute value of 8-bit signed integer elements stored
 * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates
 * 0x7f as the output and sets the OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.B[x];
 * if (src == 0x80) {
 *   src = 0x7f;
 *   OV = 1;
 * } else if (src[7] == 1)
 *   src = -src;
 * }
 * Rd.B[x] = src;
 * for RV32: x=7...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKABS8(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dkabs8 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DKABS8 ===== */

/* ===== Inline Function Start for DKABS16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKABS16 (64-bit SIMD 16-bit Saturating Absolute)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKABS16 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Get the absolute value of 16-bit signed integer elements simultaneously.
 *
 * **Description**:\n
 * This instruction calculates the absolute value of 16-bit signed integer elements stored
 * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction
 * generates 0x7fff as the output and sets the OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.H[x];
 * if (src == 0x8000) {
 *   src = 0x7fff;
 *   OV = 1;
 * } else if (src[15] == 1)
 *   src = -src;
 * }
 * Rd.H[x] = src;
 * for RV32: x=3...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKABS16(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dkabs16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DKABS16 ===== */

/* ===== Inline Function Start for DKSLRA8 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKSLRA8 (64-bit SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSLRA8 Rd, Rs1, Rs2
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q7 saturation for the left shift.
 *
 * **Description**:\n
 * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
 * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
 * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
 * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1].
 * If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[3:0] < 0) {
 *   sa = -Rs2[3:0];
 *   sa = (sa == 8)? 7 : sa;
 *   Rd.B[x] = SE8(Rs1.B[x][7:sa]);
 * } else {
 *   sa = Rs2[2:0];
 *   res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
 *   if (res > (2^7)-1) {
 *     res[7:0] = 0x7f; OV = 1;
 *   } else if (res < -2^7) {
 *     res[7:0] = 0x80; OV = 1;
 *   }
 *   Rd.B[x] = res[7:0];
 * }
 * for RV32: x=7...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSLRA8(unsigned long long a, int b)
{
    unsigned long long result;
    __ASM volatile("dkslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSLRA8 ===== */

/* ===== Inline Function Start for DKSLRA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKSLRA16 (64-bit SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSLRA16 Rd, Rs1, Rs2
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q15 saturation for the left shift.
 *
 * **Description**:\n
 * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
 * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
 * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
 * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1].
 * After the shift, saturation, or rounding, the final results are written to
 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[4:0] < 0) {
 *   sa = -Rs2[4:0];
 *   sa = (sa == 16)? 15 : sa;
 *   Rd.H[x] = SE16(Rs1.H[x][15:sa]);
 * } else {
 *   sa = Rs2[3:0];
 *   res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
 *   if (res > (2^15)-1) {
 *     res[15:0] = 0x7fff; OV = 1;
 *   } else if (res < -2^15) {
 *     res[15:0] = 0x8000; OV = 1;
 *   }
 *   d.H[x] = res[15:0];
 * }
 * for RV32: x=3...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSLRA16(unsigned long long a, int b)
{
    unsigned long long result;
    __ASM volatile("dkslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSLRA16 ===== */

/* ===== Inline Function Start for DKADD8 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKADD8 (64-bit SIMD 8-bit Signed Saturating Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKADD8 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
 * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they
 * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.B[x] + Rs2.B[x];
 * if (res[x] > 127) {
 *   res[x] = 127;
 *   OV = 1;
 * } else if (res[x] < -128) {
 *   res[x] = -128;
 *   OV = 1;
 * }
 * Rd.B[x] = res[x];
 * for RV32: x=7...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKADD8(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKADD8 ===== */

/* ===== Inline Function Start for DKADD16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKADD16 (64-bit SIMD 16-bit Signed Saturating Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKADD16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
 * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1),
 * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.H[x] + Rs2.H[x];
 * if (res[x] > 32767) {
 *   res[x] = 32767;
 *   OV = 1;
 * } else if (res[x] < -32768) {
 *   res[x] = -32768;
 *   OV = 1;
 * }
 * Rd.H[x] = res[x];
 * for RV32: x=3...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKADD16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKADD16 ===== */

/* ===== Inline Function Start for DKSUB8 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKSUB8 (64-bit SIMD 8-bit Signed Saturating Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSUB8 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
 * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1),
 * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.B[x] - Rs2.B[x];
 * if (res[x] > (2^7)-1) {
 *   res[x] = (2^7)-1;
 *   OV = 1;
 * } else if (res[x] < -2^7) {
 *   res[x] = -2^7;
 *   OV = 1;
 * }
 * Rd.B[x] = res[x];
 * for RV32: x=7...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSUB8(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSUB8 ===== */

/* ===== Inline Function Start for DKSUB16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKSUB16 (64-bit SIMD 16-bit Signed Saturating Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSUB16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
 * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <=
 * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.H[x] - Rs2.H[x];
 * if (res[x] > (2^15)-1) {
 *   res[x] = (2^15)-1;
 *   OV = 1;
 * } else if (res[x] < -2^15) {
 *   res[x] = -2^15;
 *   OV = 1;
 * }
 * Rd.H[x] = res[x];
 * for RV32: x=3...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSUB16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSUB16 ===== */

/* ===== Inline Function Start for DKHMX8 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief DKHMX8 (64-bit SIMD Signed Crossed Saturating Q7 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKHMX8 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do Q7xQ7 element crossed multiplications simultaneously. The Q15 results are then reduced to Q7 numbers again.
 *
 * **Description**:\n
 * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
 * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
 * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
 *
 * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
 * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
 * The result will be saturated to 0x7F and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.B[x+1]; op2t = Rs2.B[x]; // top
 * op1b = Rs1.B[x]; op2b = Rs2.B[x+1]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x80 != aop | 0x80 != bop) {
 *     res = (aop s* bop) >> 7;
 *   } else {
 *     res= 0x7F;
 *     OV = 1;
 *   }
 * }
 * Rd.H[x/2] = concat(rest, resb);
 * for RV32, x=0,2,4,6
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKHMX8(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkhmx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKHMX8 ===== */

/* ===== Inline Function Start for DKHMX16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief DKHMX16 (64-bit SIMD Signed Crossed Saturating Q15 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKHMX16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do Q15xQ15 element crossed multiplications simultaneously. The Q31 results are then reduced to Q15 numbers again.
 *
 * **Description**:\n
 * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
 * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
 * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
 *
 * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
 * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
 * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // top
 * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x8000 != aop | 0x8000 != bop) {
 *     res = (aop s* bop) >> 15;
 *   } else {
 *     res= 0x7FFF;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x/2] = concat(rest, resb);
 * for RV32, x=0,2
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKHMX16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkhmx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKHMX16 ===== */

/* ===== Inline Function Start for DSMMUL ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief DSMMUL (64-bit MSW 32x32 Signed Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMMUL Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element signed multiplications simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
 * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
 * elements of Rs1 and Rs2 are treated as signed integers. The .u form of the instruction rounds up
 * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = (aop s* bop)[63:32];
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMMUL(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dsmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMMUL ===== */

/* ===== Inline Function Start for DSMMUL.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief  DSMMUL.u (64-bit MSW 32x32 Unsigned Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMMUL.u Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element unsigned multiplications simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
 * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
 * elements of Rs1 and Rs2 are treated as unsigned integers. The .u form of the instruction rounds up
 * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = RUND(aop u* bop)[63:32];
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMMUL_U(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dsmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMMUL.u ===== */

/* ===== Inline Function Start for DKWMMUL ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DKWMMUL (64-bit MSW 32x32 Signed Multiply & Double)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKWMMUL Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element signed multiplications simultaneously and double. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
 * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
 * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
 * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The .u
 * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
 * 30 before the shift and saturation operations.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *     res = sat.q31((aop s* bop) << 1)[63:32];
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKWMMUL(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkwmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKWMMUL ===== */

/* ===== Inline Function Start for DKWMMUL.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DKWMMUL.u (64-bit MSW 32x32 Unsigned Multiply & Double)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKWMMUL.u Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element unsigned multiplications simultaneously and double. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
 * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
 * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
 * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The .u
 * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
 * 30 before the shift and saturation operations.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = sat.q31(RUND(aop u* bop) << 1)[63:32];
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKWMMUL_U(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkwmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKWMMUL.u ===== */

/* ===== Inline Function Start for DKABS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DKABS32 (64-bit SIMD 32-bit Saturating Absolute)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKABS32 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Get the absolute value of 32-bit signed integer elements simultaneously.
 *
 * **Description**:\n
 * This instruction calculates the absolute value of 32-bit signed integer elements stored in Rs1 and writes the element
 * results to Rd. If the input number is 0x8000_0000, this instruction generates 0x7fff_ffff as the output and sets the OV
 * bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.W[x];
 * if (src == 0x8000_0000) {
 *   src = 0x7fff_ffff;
 *   OV = 1;
 * } else if (src[31] == 1)
 *   src = -src;
 * }
 * Rd.W[x] = src;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKABS32(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dkabs32 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DKABS32 ===== */

/* ===== Inline Function Start for DKSLRA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DKSLRA32 (64-bit SIMD 32-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSLRA32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 31-bit elements logical left (positive) or arithmetic right (negative) shift operation with Q31 saturation for the left shift.
 *
 * **Description**:\n
 * The 31-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically based on the value of Rs2[5:0].
 * Rs2[5:0] is in the signed range of [-2^5, 2^5-1]. A positive Rs2[5:0] means logical left shift and a negative Rs2[4:0]
 * means arithmetic right shift. The shift amount is the absolute value of Rs2[5:0]. However, the behavior of Rs2[5:0]==-
 * 2^5 (0x20) is defined to be equivalent to the behavior of Rs2[5:0]==-(2^5-1) (0x21).
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[5:0] < 0) {
 *   sa = -Rs2[5:0];
 *   sa = (sa == 32)? 31 : sa;
 *   Rd.W[x] = SE32(Rs1.W[x][31:sa]);
 * } else {
 *   sa = Rs2[4:0];
 *   res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
 *   if (res > (2^31)-1) {
 *   res[31:0] = 0x7fff_ffff; OV = 1;
 * } else if (res < -2^31) {
 *   res[31:0] = 0x8000_0000; OV = 1;
 * }
 *   Rd.W[x] = res[31:0];
 * }
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSLRA32(unsigned long long a, int b)
{
    unsigned long long result;
    __ASM volatile("dkslra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSLRA32 ===== */

/* ===== Inline Function Start for DKADD32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DKADD32(64-bit SIMD 32-bit Signed Saturating Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKADD32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed integer elements in Rs2. If any
 * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV
 * bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.W[x] + Rs2.W[x];
 * if (res[x] > 0x7fff_ffff) {
 *   res[x] = 0x7fff_ffff;
 *   OV = 1;
 * } else if (res[x] < 0x8000_0000) {
 *   res[x] = 0x8000_0000;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKADD32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKADD32 ===== */

/* ===== Inline Function Start for DKSUB32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DKSUB32 (64-bit SIMD 32-bit Signed Saturating Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSUB32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1. If
 * any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the
 * OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.W[x] - Rs2.W[x];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSUB32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSUB32 ===== */

/* ===== Inline Function Start for DRADD16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DRADD16 (64-bit SIMD 16-bit Halving Signed Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRADD16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element additions simultaneously. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed integer elements in Rs2. The results
 * are first arithmetically right-shifted by 1 bit and then written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = [(Rs1.H[x]) + (Rs2.H[x])] s>> 1;
 * x=3...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRADD16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dradd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRADD16 ===== */

/* ===== Inline Function Start for DSUB16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DSUB16 (64-bit SIMD 16-bit Halving Signed Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSUB16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed integer elements in Rs2. The results
 * are first arithmetically right-shifted by 1 bit and then written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = [(Rs1.H[x]) - (Rs2.H[x])] ;
 * x=3...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSUB16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSUB16 ===== */

/* ===== Inline Function Start for DRADD32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DRADD32 (64-bit SIMD 32-bit Halving Signed Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRADD32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element additions simultaneously. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed integer elements in Rs2. The results
 * are first arithmetically right-shifted by 1 bit and then written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = [(Rs1.W[x]) + (Rs2.W[x])] s>> 1;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRADD32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dradd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRADD32 ===== */

/* ===== Inline Function Start for DSUB32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSUB32 (64-bit SIMD 32-bit Halving Signed Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSUB32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1 . The
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = [(Rs1.E[x]) - (Rs2.E[x])] ;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSUB32(unsigned long long a, unsigned long long b)
{
     unsigned long long result;
    __ASM volatile("dsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSUB32 ===== */

/* ===== Inline Function Start for DMSR16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DMSR16 (Signed Multiply Halfs with Right Shift 16-bit and Cross Multiply Halfs with Right Shift 16-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DMSR16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications and cross multiplications from the 16-bit elements of two registers; and each multiplications performs a right shift operation.
 *
 * **Description**:\n
 * For the `DMSR16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content
 * of 32-bit chunks in Rs2, multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content
 * of 32-bit chunks in Rs2.
 * At the same time, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom16-bit Q15 content of 32-bit
 * chunks in Rs2 and multiply the bottom16-bit Q15 content of 32-bit chunks in Rs1 with the top16-bit Q15 content of 32-bit
 * chunks in Rs2. The Q31 results are then right-shifted 16-bits and clipped to Q15 values. The Q15 results are then written
 * into Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[0] = (Rs1.H[0] s* Rs2.H[0]) s>> 16
 * Rd.H[1] = (Rs1.H[1] s* Rs2.H[1]) s>> 16
 * Rd.H[2] = (Rs1.H[1] s* Rs2.H[0]) s>> 16
 * Rd.H[3] = (Rs1.H[0] s* Rs2.H[1]) s>> 16
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \param [in]  b unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DMSR16(unsigned long a, unsigned long b)
{
    unsigned long long result;
    __ASM volatile("dmsr16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DMSR16 ===== */

/* ===== Inline Function Start for DMSR17 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DMSR17 (Signed Multiply Halfs with Right Shift 17-bit and Cross Multiply Halfs with Right Shift 17-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DMSR17 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications and cross multiplications from the 16-bit elements of two registers;
 * and each multiplications performs a right shift operation.
 *
 * **Description**:\n
 * For the `DMSR17` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content
 * of 32-bit chunks in Rs2, multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content
 * of 32-bit chunks in Rs2.
 * At the same time, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit
 * chunks in Rs2 and multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit
 * chunks in Rs2. The Q31 results are then right-shifted 17-bits and clipped to Q15 values. The Q15 results are then written
 * into Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[0] = (Rs1.H[0] s* Rs2.H[0]) s>> 17
 * Rd.H[1] = (Rs1.H[1] s* Rs2.H[1]) s>> 17
 * Rd.H[2] = (Rs1.H[1] s* Rs2.H[0]) s>> 17
 * Rd.H[3] = (Rs1.H[0] s* Rs2.H[1]) s>> 17
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \param [in]  b unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DMSR17(unsigned long a, unsigned long b)
{
    unsigned long long result;
    __ASM volatile("dmsr17 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DMSR17 ===== */

/* ===== Inline Function Start for DMSR33 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DMSR33 (Signed Multiply with Right Shift 33-bit and Cross Multiply with Right Shift 33-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DMSR33 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the 32-bit elements of two registers, and each multiplications performs a right
 * shift operation.
 *
 * **Description**:\n
 * For the `DMSR33` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the top 32-bit Q31 content
 * of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64bit chunks in Rs1 with the bottom 
 * 32-bit Q31 content of 64-bit. 
 * The Q64 results are then right-shifted 33-bits and clipped to Q31 values. The Q31 results are then written into Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[0] = (Rs1.W[0] s* Rs2.W[0]) s>> 33
 * Rd.W[1] = (Rs1.W[1] s* Rs2.W[1]) s>> 33
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DMSR33(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dmsr33 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DMSR33 ===== */

/* ===== Inline Function Start for DMXSR33 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DMXSR33 (Signed Multiply with Right Shift 33-bit and Cross Multiply with Right Shift 33-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DMXSR33 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit cross multiplications from the 32-bit elements of two registers, and each multiplications performs a
 * right shift operation.
 *
 * **Description**:\n
 * For the `DMXSR33` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit Q31
 * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with
 * the top 32-bit Q31 content of 64-bit chunks in Rs2.
 * The Q63 results are then right-shifted 33-bits and clipped to Q31 values. The Q31 results are then written into Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[0] = (Rs1.W[0] s* Rs2.W[1]) s>> 33
 * Rd.W[1] = (Rs1.W[1] s* Rs2.W[0]) s>> 33
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DMXSR33(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dmxsr33 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DMXSR33 ===== */

/* ===== Inline Function Start for DREDAS16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DREDAS16 (Reduced Addition and Reduced Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DREDAS16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do halfs reduced subtraction and halfs reduced addition from a register. The result is written to Rd.
 *
 * **Description**:\n
 * For the `DREDAS16` instruction, subtract the top 16-bit Q15 element from the bottom 16-bit Q15 element of the bottom
 * 32-bit Q31 content of 64-bit chunks in Rs1. At the same time, add the the top16-bit Q15 element with the bottom16-bit
 * Q15 element of the top 32-bit Q31 content of 64-bit chunks in Rs1. The two Q15 results are then written into Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[0] = Rs1.H[0] - Rs1.H[1]
 * Rd.H[1] = Rs1.H[2] + Rs1.H[3]
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_DREDAS16(unsigned long long a)
{
    unsigned long result;
    __ASM volatile("dredas16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DREDAS16 ===== */

/* ===== Inline Function Start for DREDSA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DREDSA16 (Reduced Subtraction and Reduced Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DREDSA16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do halfs reduced subtraction and halfs reduced addition from a register. The result is written to Rd.
 *
 * **Description**:\n
 * For the `DREDSA16` instruction, add the top 16-bit Q15 element from the bottom 16-bit Q15  element of the bottom 32-bit Q31 content of 64-bit chunks in Rs1. At the same time,  subtract the the top16-bit Q15 element with the bottom16-bit Q15 element of the top 32-bit Q31 content of 64-bit chunks in Rs1. The two Q15 results are then written into Rd. 
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[0] = Rs1.H[0] + Rs1.H[1]
 * Rd.H[1] = Rs1.H[2] - Rs1.H[3]
 * ~~~
 *
 * \param [in]  a unsigned long longtype of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_DREDSA16(unsigned long long a)
{
    unsigned long result;
    __ASM volatile("dredsa16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DREDSA16 ===== */

/* ===== Inline Function Start for DKCLIP64 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKCLIP64 (64-bit Clipped to 16-bit Saturation Value)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKCLIP64 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 15-bit element arithmetic right shift operations and limit result into 32-bit int,then do saturate operation to 16-bit and
 * clip result to 16-bit Q15.
 *
 * **Description**:\n
 * For the `DKCLIP64` instruction, shift the input 15 bits to the right and data convert the result to 32-bit int type, after
 * which the input is saturated to limit the data to between 2^15-1 and -2^15. the result is converted to 16-bits q15 type. The
 * final results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * const int32_t max = (int32_t)((1U << 15U) - 1U);
 * const int32_t min = -1 - max ;
 * int32_t val = (int32_t)(Rs s>> 15);
 * if (val > max) {
 *   Rd = max;
 * } else if (val < min) {
 *   Rd = min;
 * } else {
 *   Rd = (int16_t)val;
 * }
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in int16_t type
 */
__STATIC_FORCEINLINE int16_t __RV_DKCLIP64(unsigned long long a)
{
    int16_t result;
    __ASM volatile("dkclip64 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DKCLIP64 ===== */

/* ===== Inline Function Start for DKMDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKMDA (Signed Multiply Two Halfs and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMDA Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then adds the two 32-bit results together.
 * The addition result may be saturated.
 *
 * **Description**:\n
 * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
 * 32-bit elements of Rs2 and then adds the result to the result of multiplying the top 16-bit content of the 32-bit elements of
 * Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1 The final results are
 * written to Rd. The 16-bit contents are treated as signed integers
 *
 * **Operations**:\n
 * ~~~
 * if (Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000){
 *   Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * } else {
 *   Rd.W[x] = 0x7fffffff;
 *   OV = 1;
 * }
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMDA(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkmda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKMDA ===== */

/* ===== Inline Function Start for DKMXDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DKMXDA (Signed Crossed Multiply Two Halfs and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then adds the two 32-bit results together.
 * The addition result may be saturated.
 * * DKMXDA: top*bottom + top*bottom (per 32-bit element)
 *
 * **Description**:\n
 * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit
 * elements of Rs2 and then adds the result to the result of multiplying the top 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2.
 * The addition result is checked for saturation.If saturation happens, the result is saturated to 2^31-1 The final results are
 * written to Rd. The 16-bit contents are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000){
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * } else {
 * Rd.W[x] = 0x7fffffff;
 * OV = 1;
 * }
 * x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMXDA(unsigned long long a, unsigned long long b)
{
   unsigned long long result;
    __ASM volatile("dkmxda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKMXDA ===== */

/* ===== Inline Function Start for DSMDRS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DSMDRS (Signed Multiply Two Halfs and Reverse Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMDRS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then perform a subtraction operation
 * between the two 32-bit results.
 * * DSMDRS: bottom*bottom - top*top (per 32-bit element)
 *
 * **Description**:\n
 * This instruction multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit
 * elements of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of the 32-bit elements
 * of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
 * The subtraction result is written to the corresponding 32-bit element of Rd (The 16-bit contents of multiplication are
 * treated as signed integers).
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); x = 1...0
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMDRS(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dsmdrs %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMDRS ===== */

/* ===== Inline Function Start for DSMXDS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DSMXDS (Signed Crossed Multiply Two Halfs and Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then perform a subtraction operation
 * between the two 32-bit results.
 * * DSMXDS: top*bottom - bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit
 * elements of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of the 32-bit elements
 * of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
 * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of multiplication are
 * treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); x = 1...0
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMXDS(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dsmxds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMXDS ===== */

/* ===== Inline Function Start for DSMBB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DSMBB32 (Signed Multiply Bottom Word & Bottom Word)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit result to a third register.
 * * DSMBB32: bottom*bottom
 *
 * **Description**:\n
 * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2. The 64-bit multiplication result is written to Rd.
 * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = (Rs1.W[0] * Rs2.W[0]);
 * Rd = res;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMBB32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBB32 ===== */

/* ===== Inline Function Start for DSMBB32.sra14 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DSMBB32.sra14 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 14)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBB32.sra14 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 14-
 * bit,finally write the 64-bit result to a third register.
 * * DSMBB32.sra14: bottom*bottom s>> 14
 *
 * **Description**:\n
 * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2. The 64-bit multiplication result is written to Rd after right shift 14-bit.
 * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
 * Rd = res;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMBB32_SRA14(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmbb32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBB32.sra14 ===== */

/* ===== Inline Function Start for DSMBB32.sra32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DSMBB32.sra32 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 32)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBB32.sra32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 32-
 * bit,finally write the 64-bit result to a third register.
 * * DSMBB32.sra32: bottom*bottom s >> 32
 *
 * **Description**:\n
 * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
 * The 64-bit multiplication result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = (Rs1.W[0] * Rs2.W[0]) s>> 32;
 * Rd = res;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMBB32_SRA32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmbb32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBB32.sra32 ===== */

/* ===== Inline Function Start for DSMBT32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    SMBT32 (Signed Multiply Bottom Word & Top Word)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBT32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit
 * result to a third register.
 * * DSMBT32: bottom*top
 *
 * **Description**:\n
 * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
 * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = (Rs1.W[0] * Rs2.W[0]);
 * Rd = res;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMBT32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBT32 ===== */

/* ===== Inline Function Start for DSMBT32.sra14 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMBT32.sra14 (Signed Multiply Bottom Word & Top Word with Right Shift 14)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBT32.sra14 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 14-
 * bit,finally write the 64-bit result to a third register.
 * * DSMBT32.sra14: bottom*bottom s>> 14
 *
 * **Description**:\n
 * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
 * result is written to Rd after right shift 14-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
 * Rd = res;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMBT32_SRA14(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmbt32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBT32.sra14 ===== */

/* ===== Inline Function Start for DSMBT32.sra32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMBT32.sra32 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 32)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBT32.sra32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 32-
 * bit,finally write the 64-bit result to a third register.
 * * DSMBT32.sra32: bottom*bottom s>> 32
 *
 * **Description**:\n
 * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
 * result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
 * Rd = res;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMBT32_SRA32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmbt32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBT32.sra32 ===== */

/* ===== Inline Function Start for DSMTT32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMTT32 (Signed Multiply Top Word & Top Word)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMTT32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit
 * result to a third register.
 * * DSMTT32: top*top
 *
 * **Description**:\n
 * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
 * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rs1.W[1] * Rs2.W[1];
 * Rd = res;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMTT32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmtt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMTT32 ===== */

/* ===== Inline Function Start for DSMTT32.sra14 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMTT32.sra14 (Signed Multiply Top Word & Top Word with Right Shift 14-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMTT32.sra14 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register,then right shift 14-bit,
 * finally write the 64-bit result to a third register.
 * * DSMTT32.sra14: top*top s>> 14
 *
 * **Description**:\n
 * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
 * result is written to Rd after right shift 14-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rs1.W[1] * Rs2.W[1] >> 14;
 * Rd = res;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMTT32_SRA14(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmtt32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMTT32.sra14 ===== */

/* ===== Inline Function Start for DSMTT32.sra32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMTT32.sra32 (Signed Multiply Top Word & Top Word with Right Shift 32-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMTT32.sra32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register,then right shift 32-bit,
 * finally write the 64-bit result to a third register.
 * * DSMTT32.sra32: top*top s>> 32
 *
 * **Description**:\n
 * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
 * result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rs1.W[1] * Rs2.W[1] >> 32;
 * Rd = res;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMTT32_SRA32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmtt32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMTT32.sra32 ===== */

/* ===== Inline Function Start for DPKBB32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKBB32 (Pack Two 32-bit Data from Both Bottom Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKBB32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * DPKBB32: bottom.bottom
 *
 * **Description**:\n
 * This instruction moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[0], Rs2.W[0]);
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKBB32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpkbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKBB32 ===== */

/* ===== Inline Function Start for DPKBT32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKBT32 (Pack Two 32-bit Data from Bottom and Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKBT32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * DPKBT32: bottom.top
 *
 * **Description**:\n
 * This instruction moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[0], Rs2.W[1]);
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKBT32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpkbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKBT32 ===== */

/* ===== Inline Function Start for DPKTT32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKTT32 (Pack Two 32-bit Data from Both Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKTT32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * DPKTT32: top.top
 *
 * **Description**:\n
 * This instruction moves Rs1.W[1] to Rd.W[0] and moves Rs2.W[1] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[1], Rs2.W[1]);
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKTT32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpktt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKTT32 ===== */

/* ===== Inline Function Start for DPKTB32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKTB32 (Pack Two 32-bit Data from Top and Bottom Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKTB32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * DPKTB32: top.bottom
 *
 * **Description**:\n
 * This instruction moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[1], Rs2.W[0]);
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKTB32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpktb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKTB32 ===== */

/* ===== Inline Function Start for DPKTB16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKTB16 (Pack Two 32-bit Data from Top and Bottom Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKTB16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * DPKTB16: top.bottom
 *
 * **Description**:\n
 * This instruction moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]);
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKTB16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpktb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKTB16 ===== */

/* ===== Inline Function Start for DPKBB16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKBB16 (Pack Two 16-bit Data from Both Bottom Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKBB16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * PKBB16: bottom.bottom
 *
 * **Description**:\n
 * This instruction moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]);
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKBB16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpkbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKBB16 ===== */

/* ===== Inline Function Start for DPKBT16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKBT16 (Pack Two 16-bit Data from Bottom and Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKBT16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * PKBT16: bottom.top
 *
 * **Description**:\n
 * This instruction moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]);
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKBT16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpkbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKBT16 ===== */

/* ===== Inline Function Start for DPKTT16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKTT16 (Pack Two 16-bit Data from Both Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKTT16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * PKTT16 top.top 
 *
 * **Description**:\n
 * This instruction moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]);
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKTT16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpktt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKTT16 ===== */

/* ===== Inline Function Start for DSRA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSRA16 (SIMD 16-bit Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSRA16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a variable from a GPR.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out bits are filled with the sign-bit of
 * the data elements. The shift amount is specified by the low-order 4-bits of the value in the Rs2 register. And the results
 * are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[3:0];
 * if (sa != 0)
 * {
 * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
 * } else {
 * Rd = Rs1;
 * }
 * x=3...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSRA16(unsigned long long a, unsigned long b)
{
    unsigned long long result;
    __ASM volatile("dsra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSRA16 ===== */

/* ===== Inline Function Start for DADD16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DADD16 (16-bit Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DADD16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit unsigned integer elements in Rs2. And
 * the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = Rs1.H[x] + Rs2.H[x];
 * x=3...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DADD16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DADD16 ===== */

/* ===== Inline Function Start for DADD32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DADD32 (32-bit Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DADD32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer elements in Rs1 with the 32-bit integer elements in Rs2, and then writes the 32-bit
 * element results to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x] + Rs2.W[x];
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DADD32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DADD32 ===== */

/* ===== Inline Function Start for DSMBB16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMBB16 (Signed Multiply Bottom Half & Bottom Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBB16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit elements
 * of another register and write the result to a third register.
 * * DSMBB16: W[x].bottom*W[x].bottom
 *
 * **Description**:\n
 * For the `DSMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom
 * 16-bit content of the 32-bit elements of Rs2.
 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0];
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMBB16(unsigned long long a, unsigned long long b) /* pass */
{
    unsigned long long result;
    __ASM volatile("dsmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBB16 ===== */

/* ===== Inline Function Start for DSMBT16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMBT16 (Signed Multiply Bottom Half & Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBT16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit
 * elements of another register and write the result to a third register.
 * * DSMBT16: W[x].bottom *W[x].top
 *
 * **Description**:\n
 * For the `DSMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
 * content of the 32-bit elements of Rs2.
 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1];
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMBT16(unsigned long long a, unsigned long long b) /* pass */
{
    unsigned long long result;
    __ASM volatile("dsmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBT16 ===== */

/* ===== Inline Function Start for DSMTT16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMTT16 (Signed Multiply Top Half & Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMTT16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit
 * elements of another register and write the result to a third register.
 * * DSMTT16: W[x].top * W[x].top
 *
 * **Description**:\n
 * For the `DSMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
 * content of the 32-bit elements of Rs2.
 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1];
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMTT16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dsmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMTT16 ===== */

/* ===== Inline Function Start for DRCRSA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DRCRSA16 (16-bit Signed Halving Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRCRSA16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in a 32-bit chunk simultaneously.
 * Operands are from crossed positions in 32-bit chunks. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer in
 * [15:0] of 32-bit chunks in Rs2, and adds the 16-bit signed integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed
 * integer in [15:0] of 32-bit chunks in Rs1. The element results are first logically right-shifted by 1 bit and then written to
 * [31:16] of 32- bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) s>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) s>> 1;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRCRSA16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("drcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRCRSA16 ===== */

/* ===== Inline Function Start for DRCRSA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DRCRSA32 (32-bit Signed Halving Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRCRSA32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in a 64-bit chunk simultaneously.
 * Operands are from crossed 32-bit elements. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit signed integer element in
 * [31:0] of Rs2, and adds the 32-bit signed integer element in [63:32] of Rs2 from the 32-bit signed integer element in [31:0]
 * of Rs1. The element results are first arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and
 * [31:0] of Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) s>> 1;
 * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) s>> 1;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRCRSA32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("drcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRCRSA32 ===== */

/* ===== Inline Function Start for DRCRAS16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DRCRAS16 (16-bit Signed Halving Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRCRAS16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in a 32-bit chunk simultaneously.
 * Operands are from crossed positions in 32-bit chunks. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in
 * [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit
 * unsigned integer in [15:0] of 32-bit chunks in Rs1. The element results are first logically right-shifted by 1 bit and then
 * written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) s>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) s>> 1;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRCRAS16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("drcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRCRAS16 ===== */

/* ===== Inline Function Start for DRCRAS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DRCRAS32 (32-bit Signed Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRCRAS32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in a 64-bit chunk simultaneously.
 * Operands are from crossed 32-bit elements. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit signed integer element in [31:0]
 * of Rs2, and subtracts the 32-bit signed integer element in [63:32] of Rs2 from the 32-bit signed integer element in [31:0]
 * of Rs1. The element results are first arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition
 * and [31:0] of Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) s>> 1;
 * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) s>> 1;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRCRAS32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("DRCRAS32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRCRAS32 ===== */

/* ===== Inline Function Start for DKCRAS16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKCRAS16 (16-bit Signed Saturating Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKCRAS16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating addition and 16-bit signed integer element saturating subtraction in a 32-bit
 * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer
 * element in [15:0] of 32-bit chunks in Rs2; at the same time, it subtracts the 16-bit signed integer element in [31:16] of
 * 32-bit chunks in Rs2 from the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
 * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
 * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks
 * in Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
 * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKCRAS16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKCRAS16 ===== */

/* ===== Inline Function Start for DKCRSA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKCRSA16 (16-bit Signed Saturating Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKCRSA16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element saturating addition in a 32-bit
 * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer
 * element in [31:16] of 32-bit chunks in Rs1; at the same time, it adds the 16-bit signed integer element in [31:16] of 32-bit
 * chunks in Rs2 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
 * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
 * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks
 * in Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
 * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKCRSA16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKCRSA16 ===== */

/* ===== Inline Function Start for DRSUB16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DRSUB16 (16-bit Signed Halving Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRSUB16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element subtractions simultaneously. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit signed integer elements in Rs1. The
 * results are first arithmetically right-shifted by 1 bit and then written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) s>> 1;
 * x=3...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRSUB16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("drsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRSUB16 ===== */

/* ===== Inline Function Start for DSTSA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSTSA32 (32-bit Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSTSA32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit chunk simultaneously. Operands are
 * from corresponding 32-bit elements.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [63:32] of Rs1,
 * and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit
 * integer element in [31:0] of Rs2, and writes the result to [31:0] of Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = Rs1.W[1] - Rs2.W[1];
 * Rd.W[0] = Rs1.W[0] + Rs2.W[0];
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSTSA32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSTSA32 ===== */

/* ===== Inline Function Start for DSTAS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSTAS32 (SIMD 32-bit Straight Addition & Subtractionn)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSTAS32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit chunk simultaneously. Operands are
 * from corresponding 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [63:32] of Rs2,
 * and writes the result to [63:32] of Rd; at the same time, it subtracts the 32-bit integer element in [31:0] of Rs2
 * from the 32-bit integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = Rs1.W[1] + Rs2.W[1];
 * Rd.W[0] = Rs1.W[0] - Rs2.W[0];
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSTAS32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("DSTAS32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSTAS32 ===== */

/* ===== Inline Function Start for DKCRSA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKCRSA32 (32-bit Signed Saturating Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKCRSA32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element saturating addition in a 64-bit
 * chunk simultaneously. Operands are from crossed 32-bit elements.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [63:32] of Rs1; at
 * the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2. If any
 * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is
 * set to 1. The saturated results are written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] - Rs2.W[0];
 * res[0] = Rs1.W[0] + Rs2.W[1];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[1] = res[1];
 * Rd.W[0] = res[0];
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKCRSA32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKCRSA32 ===== */

/* ===== Inline Function Start for DKCRAS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKCRAS32 (32-bit Signed Saturating Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKCRAS32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element saturating addition in a 64-bit
 * chunk simultaneously. Operands are from crossed 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [63:32] of Rs1; at the
 * same time, it subtracts the 32-bit integer element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2. If any
 * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is
 * set to 1. The saturated results are written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] + Rs2.W[0];
 * res[0] = Rs1.W[0] - Rs2.W[1];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[1] = res[1];
 * Rd.W[0] = res[0];
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKCRAS32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKCRAS32 ===== */

/* ===== Inline Function Start for DCRSA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DCRSA32 (32-bit Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DCRSA32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit chunk simultaneously. Operands are
 * from crossed 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and
 * writes the result to [63:32] of Rd; at the same time, it subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit
 * integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] - Rs2.W[0];
 * res[0] = Rs1.W[0] + Rs2.W[1];
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DCRSA32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DCRSA32 ===== */

/* ===== Inline Function Start for DCRAS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DCRAS32 (32-bit Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DCRAS32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit chunk simultaneously. Operands are
 * from crossed 32-bit elements.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and
 * writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer element in [63:32] of Rs2 from the 32-bit
 * integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] - Rs2.W[0];
 * res[0] = Rs1.W[0] + Rs2.W[1];
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DCRAS32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DCRAS32 ===== */

/* ===== Inline Function Start for DKSTSA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKSTSA16 (16-bit Signed Saturating Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSTSA16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element saturating addition in a 32-bit
 * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer
 * element in [31:16] of 32-bit chunks in Rs1; at the same time, it adds the 16-bit signed integer element in [15:0] of 32-bit
 * chunks in Rs2 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
 * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
 * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks
 * in Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
 * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSTSA16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSTSA16 ===== */

/* ===== Inline Function Start for DKSTAS16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKSTAS16 (16-bit Signed Saturating Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSTAS16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating addition and 16-bit signed integer element saturating subtraction in a 32-bit
 * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer
 * element in [31:16] of 32-bit chunks in Rs2; at the same time, it subtracts the 16-bit signed integer element in [15:0] of
 * 32-bit chunks in Rs2 from the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
 * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
 * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks
 * in Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
 * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSTAS16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSTAS16 ===== */

/* ===== Inline Function Start for DSCLIP8 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSCLIP8 (8-bit Signed Saturation and Clip)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSCLIP8 Rd, Rs1, imm3u[2:0]
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 8-bit signed integer elements of a register into a signed range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 8-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm3u and
 * 2^imm3u-1, and writes the limited results to Rd. For example, if imm3u is 3, the 8-bit input values should be saturated
 * between 7 and -8. If saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.B[x];
 * if (src > (2^imm3u)-1) {
 *   src = (2^imm3u)-1;
 *   OV = 1;
 * } else if (src < -2^imm3u) {
 *   src = -2^imm3u;
 *   OV = 1;
 * }
 * Rd.B[x] = src
 * x=7...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
#define __RV_DSCLIP8(a, b)    \
    ({    \
        unsigned long long result;    \
        unsigned long long __a = (unsigned long long)(a);    \
        __ASM volatile("dsclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for DSCLIP8 ===== */

/* ===== Inline Function Start for DSCLIP16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSCLIP16 (16-bit Signed Saturation and Clip)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSCLIP16 Rd, Rs1, imm4u[3:0]
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 16-bit signed integer elements of a register into a signed range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 16-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm4u and
 * 2^imm4u-1, and writes the limited results to Rd. For example, if imm4u is 3, the 32-bit input values should be saturated
 * between 7 and -8. If saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.H[x];
 * if (src > (2^imm4u)-1) {
 *   src = (2^imm4u)-1;
 *   OV = 1;
 * } else if (src < -2^imm4u) {
 *   src = -2^imm4u;
 *   OV = 1;
 * }
 * Rd.H[x] = src
 * x=3...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
#define __RV_DSCLIP16(a, b)    \
    ({    \
        unsigned long long result;    \
        unsigned long long __a = (unsigned long long)(a);    \
        __ASM volatile("dsclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for DSCLIP16 ===== */

/* ===== Inline Function Start for DSCLIP32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSCLIP32 (32-bit Signed Saturation and Clip)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSCLIP32 Rd, Rs1, imm5u[4:0]
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 32-bit signed integer elements of a register into a signed range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 32-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm5u and
 * 2^imm5u-1, and writes the limited results to Rd. For example, if imm5u is 3, the 32-bit input values should be saturated
 * between 7 and -8. If saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.W[x];
 * if (src > (2^imm5u)-1) {
 *   src = (2^imm5u)-1;
 *   OV = 1;
 * } else if (src < -2^imm5u) {
 *   src = -2^imm5u;
 *   OV = 1;
 * }
 * Rd.W[x] = src
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
#define __RV_DSCLIP32(a, b)    \
    ({    \
        unsigned long long result;    \
        unsigned long long __a = (unsigned long long)(a);    \
        __ASM volatile("dsclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for DSCLIP32 ===== */

/* ===== Inline Function Start for DRSUB32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DRSUB32 (32-bit Signed Halving Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRSUB32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element subtractions simultaneously. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1. The
 * results are first arithmetically right-shifted by 1 bit and then written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) s>> 1;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRSUB32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("drsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRSUB32 ===== */

/* ===== Inline Function Start for DPACK32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPACK32 (SIMD Pack Two 32-bit Data To 64-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPACK32 Rd, Rs1, Rs2
 * # Rd is even/odd pair of register
 * ~~~
 *
 * **Purpose**:\n
 * Pack two 32-bit datas which from two registers into a 64-bit data.
 *
 * **Description**:\n
 * This instruction moves 32-bit Rs1 to Rd.W[1] and moves 32-bit Rs2 to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W , Rs2.W);
 * ~~~
 *
 * \param [in]  a signed long type of value stored in a
 * \param [in]  b signed long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPACK32(signed long a, signed long b)
{
    unsigned long long result;
    __ASM volatile("dpack32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPACK32 ===== */

/* ===== Inline Function Start for DSUNPKD810 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSUNPKD810 (Signed Unpacking Bytes 1 & 0)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSUNPKD810 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 1 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DSUNPKD810` instruction, it unpacks byte 1 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[1])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD810(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dsunpkd810 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DSUNPKD810 ===== */

/* ===== Inline Function Start for DSUNPKD820 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSUNPKD820 (Signed Unpacking Bytes 2 & 0)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSUNPKD820 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 2 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DSUNPKD820` instruction, it unpacks byte 2 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[2])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD820(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dsunpkd820 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DSUNPKD820 ===== */

/* ===== Inline Function Start for DSUNPKD830 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSUNPKD830 (Signed Unpacking Bytes 3 & 0)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSUNPKD830 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 3 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DSUNPKD830` instruction, it unpacks byte 3 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD830(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dsunpkd830 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DSUNPKD830 ===== */

/* ===== Inline Function Start for DSUNPKD831 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSUNPKD831 (Signed Unpacking Bytes 3 & 1)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSUNPKD831 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 3 and byte 1 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DSUNPKD831` instruction, it unpacks byte 3 and byte 1 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[1])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD831(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dsunpkd831 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DSUNPKD831 ===== */

/* ===== Inline Function Start for DSUNPKD832 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSUNPKD832 (Signed Unpacking Bytes 3 & 2)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSUNPKD832 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 3 and byte 2 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DSUNPKD832` instruction, it unpacks byte 3 and byte 2 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[2])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD832(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dsunpkd832 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DSUNPKD832 ===== */

/* ===== Inline Function Start for DZUNPKD810 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DZUNPKD810 (UnSigned Unpacking Bytes 1 & 0)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DZUNPKD810 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 1 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DZUNPKD810` instruction, it unpacks byte 1 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[1])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD810(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dzunpkd810 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DZUNPKD810 ===== */

/* ===== Inline Function Start for DZUNPKD820 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DZUNPKD820 (UnSigned Unpacking Bytes 2 & 0)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DZUNPKD820 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 2 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DZUNPKD820` instruction, it unpacks byte 2 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[2])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD820(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dzunpkd820 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DZUNPKD820 ===== */

/* ===== Inline Function Start for DZUNPKD830 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DZUNPKD830 (UnSigned Unpacking Bytes 3 & 0)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DZUNPKD830 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 3 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DZUNPKD830` instruction, it unpacks byte 3 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD830(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dzunpkd830 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DZUNPKD830 ===== */

/* ===== Inline Function Start for DZUNPKD831 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DZUNPKD831 (UnSigned Unpacking Bytes 3 & 1)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DZUNPKD831 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 3 and byte 1 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DZUNPKD831` instruction, it unpacks byte 3 and byte 1 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[1])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD831(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dzunpkd831 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DZUNPKD831 ===== */

/* ===== Inline Function Start for DZUNPKD832 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DZUNPKD832 (UnSigned Unpacking Bytes 3 & 2)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DZUNPKD832 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 3 and byte 2 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DZUNPKD832` instruction, it unpacks byte 3 and byte 2 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[2])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD832(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dzunpkd832 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DZUNPKD832 ===== */

/* ===== Inline Function Start for DKMMAC ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief   DKMMAC (64-bit MSW 32x32 Signed Multiply and Saturating Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMMAC Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element signed multiplications and saturating addition simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
 * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
 * and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *    res = sat.q31(dop + (aop s* bop)[63:32]);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMMAC(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmmac %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMMAC ===== */

/* ===== Inline Function Start for DKMMAC.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief   DKMMAC.u (64-bit MSW 32x32 Unsigned Multiply and Saturating Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMMAC.u Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element unsigned multiplications and saturating addition simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
 * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
 * and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   res = sat.q31(dop + RUND(aop u* bop)[63:32]);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMMAC_U(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmmac.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMMAC.u ===== */

/* ===== Inline Function Start for DKMMSB ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief   DKMMSB (64-bit MSW 32x32 Signed Multiply and Saturating Sub)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMMSB Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element signed multiplications and saturating subtraction simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
 * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
 * range and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *    res = sat.q31(dop - (aop s* bop)[63:32]);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMMSB(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmmsb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMMSB ===== */

/* ===== Inline Function Start for DKMMSB.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief   DKMMSB.u (64-bit MSW 32x32 Unsigned Multiply and Saturating Sub)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMMSB.u Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element unsigned multiplications and saturating subtraction simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
 * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
 * range and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *    res = sat.q31(dop - (aop u* bop)[63:32]);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMMSB_U(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmmsb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMMSB.u ===== */

/* ===== Inline Function Start for DKMADA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMADA (Saturating Signed Multiply Two Halfs and Two Adds)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMADA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two 16x16 with 32-bit signed double addition simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
 * elements in Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   mul1 = aop.H[1] s* bop.H[1];
 *   mul2 = aop.H[0] s* bop.H[0];
 *   res = sat.q31(dop + mul1 + mul2);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMADA(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMADA ===== */

/* ===== Inline Function Start for DKMAXDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMAXDA (Two Cross 16x16 with 32-bit Signed Double Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMAXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross 16x16 with 32-bit signed double addition simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
 * elements in Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of
 * 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in elements in Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   mul1 = aop.H[1] s* bop.H[0];
 *   mul2 = aop.H[0] s* bop.H[1];
 *   res = sat.q31(dop + mul1 + mul2);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMAXDA(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmaxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMAXDA ===== */

/* ===== Inline Function Start for DKMADS ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DKMADS (Two 16x16 with 32-bit Signed Add and Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMADS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two 16x16 with 32-bit signed addition and subtraction simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
 * elements in Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   mul1 = aop.H[1] s* bop.H[1];
 *   mul2 = aop.H[0] s* bop.H[0];
 *   res = sat.q31(dop + mul1 - mul2);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMADS(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmads %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMADS ===== */

/* ===== Inline Function Start for DKMADRS ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DKMADRS (Two 16x16 with 32-bit Signed Add and Reversed Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMADRS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two 16x16 with 32-bit signed addition and revered subtraction simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
 * bit elements in Rs2
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   mul1 = aop.H[1] s* bop.H[1];
 *   mul2 = aop.H[0] s* bop.H[0];
 *   res = sat.q31(dop - mul1 + mul2);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMADRS(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmadrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMADRS ===== */

/* ===== Inline Function Start for DKMAXDS ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMAXDS (Saturating Signed Crossed Multiply Two Halfs & Subtract & Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMAXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross 16x16 with 32-bit signed addition and subtraction simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
 * the corresponding 32-bit elements in a third register. The addition result may be saturated.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   mul1 = aop.H[1] s* bop.H[0];
 *   mul2 = aop.H[0] s* bop.H[1];
 *   res = sat.q31(dop + mul1 - mul2);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMAXDS(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmaxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMAXDS ===== */

/* ===== Inline Function Start for DKMSDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMSDA (Two 16x16 with 32-bit Signed Double Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMSDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two 16x16 with 32-bit signed double subtraction simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   mul1 = aop.H[1] s* bop.H[0];
 *   mul2 = aop.H[0] s* bop.H[1];
 *   res = sat.q31(dop - mul1 - mul2);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMSDA(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmsda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMSDA ===== */

/* ===== Inline Function Start for DKMSXDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMSXDA (Two Cross 16x16 with 32-bit Signed Double Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMSXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross 16x16 with 32-bit signed double subtraction simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
 * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   mul1 = aop.H[1] s* bop.H[0];
 *   mul2 = aop.H[0] s* bop.H[1];
 *   res = sat.q31(dop - mul1 - mul2);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMSXDA(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmsxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMSXDA ===== */

/* ===== Inline Function Start for DSMAQA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMAQA (Four Signed 8x8 with 32-bit Signed Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMAQA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four signed 8x8 with 32-bit signed addition simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
 * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the signed
 * content of the corresponding 32-bit chunks of Rd. The final results are written back to the
 * corresponding 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   m0 = aop.B[0] s* bop.B[0];
 *   m1 = aop.B[1] s* bop.B[1];
 *   m2 = aop.B[2] s* bop.B[2];
 *   m3 = aop.B[3] s* bop.B[3];
 *   res = dop + m0 + m1 + m2 + m3;
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMAQA(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMAQA ===== */

/* ===== Inline Function Start for DSMAQA.SU ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMAQA.SU (Four Signed 8 x Unsigned 8 with 32-bit Signed Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMAQA.SU Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four Signed 8 x Unsigned 8 with 32-bit unsigned addition simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four
 * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the unsigned
 * content of the corresponding 32-bit chunks of Rd. The final results are written back to the
 * corresponding 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   m0 = aop.B[0] su* bop.B[0];
 *   m1 = aop.B[1] su* bop.B[1];
 *   m2 = aop.B[2] su* bop.B[2];
 *   m3 = aop.B[3] su* bop.B[3];
 *   res = dop + m0 + m1 + m2 + m3;
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMAQA_SU(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMAQA.SU ===== */

/* ===== Inline Function Start for DUMAQA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DUMAQA (Four Unsigned 8x8 with 32-bit Unsigned Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DUMAQA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four unsigned 8x8 with 32-bit unsigned addition simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four
 * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
 * unsigned content of the corresponding 32-bit chunks of Rd. The final results are written back to the
 * corresponding 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   m0 = aop.B[0] su* bop.B[0];
 *   m1 = aop.B[1] su* bop.B[1];
 *   m2 = aop.B[2] su* bop.B[2];
 *   m3 = aop.B[3] su* bop.B[3];
 *   res = dop + m0 + m1 + m2 + m3;
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DUMAQA(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dumaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DUMAQA ===== */

/* ===== Inline Function Start for DKMDA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMDA32 (Two Signed 32x32 with 64-bit Saturation Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 add the signed multiplication results with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
 * with the top 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * t0 = op1b s* op2b;
 * t1 = op1t s* op2t;
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMDA32(unsigned long long a, unsigned long long b)
{
   long long result;
    __ASM volatile("dkmda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKMDA32 ===== */

/* ===== Inline Function Start for DKMXDA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross signed 32x32 and add the signed multiplication results with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
 * with the bottom 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * t01 = op1b s* op2t;
 * t10 = op1t s* op2b;
 * Rd = sat.q63(t01 + t10);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMXDA32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dkmxda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKMXDA32 ===== */

/* ===== Inline Function Start for DKMADA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMADA32 (Two Signed 32x32 with 64-bit Saturation Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMADA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
 * with the top 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * t01 = op1b s* op2b;
 * t10 = op1t s* op2t;
 * Rd = sat.q63(t01 + t10);
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMADA32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMADA32 ===== */

/* ===== Inline Function Start for DKMAXDA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMAXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMAXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The
 * results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the top 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
 * with the top 32-bit element in Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * t01 = op1b s* op2t;
 * t10 = op1t s* op2b;
 * Rd = sat.q63(Rd + t01 + t10);
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMAXDA32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmaxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMAXDA32 ===== */

/* ===== Inline Function Start for DKMADS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMADS32 (Two Signed 32x32 with 64-bit Saturation Add and Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMADS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication results
 * and add a third register with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the top 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2 and then subtracts the result to the result of multiplying the top 32-bit element in Rs1
 * with the top 32-bit element in Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * t0 = op1b s* op2b;
 * t1 = op1t s* op2t;
 * Rd = sat.q63(Rd - t0 + t1);
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMADS32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmads32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMADS32 ===== */

/* ===== Inline Function Start for DKMADRS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMADRS32 (Two Signed 32x32 with 64-bit Saturation Revered Add and Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMADRS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The results
 * are written into Rd.Do two signed 32x32 and subtraction the top signed multiplication results and add bottom signed
 * multiplication results and add a third register with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the top 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element in Rs1 with the bottom 32-bit element in Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * t0 = op1b s* op2b;
 * t1 = op1t s* op2t;
 * Rd = sat.q63(Rd + t0 - t1);
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMADRS32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmadrs32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMADRS32 ===== */

/* ===== Inline Function Start for DKMAXDS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMAXDS32 (Two Cross Signed 32x32 with 64-bit Saturation Add and Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMAXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication results
 * and add a third register with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
 * Rs1 with the bottom 32-bit element in Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * t01 = op1b s* op2t;
 * t10 = op1t s* op2b;
 * Rd = sat.q63(Rd - t01 + t10);
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMAXDS32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmaxds32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMAXDS32 ===== */

/* ===== Inline Function Start for DKMSDA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMSDA32 (Two Signed 32x32 with 64-bit Saturation Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMSDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and subtraction the top signed multiplication results and subtraction bottom signed multiplication
 * results and add a third register with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * t0 = op1b s* op2b;
 * t1 = op1t s* op2t;
 * Rd = sat.q63(Rd - t0 - t1);
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMSDA32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmsda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMSDA32 ===== */

/* ===== Inline Function Start for DKMSXDA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMSXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMSXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross signed 32x32 and subtraction the top signed multiplication results and subtraction bottom signed multiplication
 * results and add a third register with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * t0 = op1b s* op2t;
 * t1 = op1t s* op2b;
 * Rd = sat.q63(Rd - t0 - t1);
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMSXDA32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmsxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMSXDA32 ===== */

/* ===== Inline Function Start for DSMDS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMDS32 (Two Signed 32x32 with 64-bit Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication. The
 * results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the top 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * t0 = op1b s* op2t;
 * t1 = op1t s* op2b;
 * Rd = t1 - t0;
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMDS32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMDS32 ===== */

/* ===== Inline Function Start for DSMDRS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMDRS32 (Two Signed 32x32 with 64-bit Revered Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMDRS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and subtraction the top signed multiplication results and add bottom signed multiplication. The results are written into Rd
 *
 * **Description**:\n
 * It multiplies the top 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element of Rs1 with the bottom 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * t0 = op1b s* op2b;
 * t1 = op1t s* op2t;
 * Rd = t1 - t0;
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMDRS32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmdrs32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMDRS32 ===== */

/* ===== Inline Function Start for DSMXDS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMXDS32 (Two Cross Signed 32x32 with 64-bit Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication.
 * The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the bottom 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * t01 = op1b s* op2t;
 * t10 = op1t s* op2b;
 * Rd = t1 - t0;
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMXDS32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmxds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMXDS32 ===== */

/* ===== Inline Function Start for DSMALDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMALDA (Four Signed 16x16 with 64-bit Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMALDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four signed 16x16 and add signed multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
 * the top 16-bit content of Rs2 with unlimited precision
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.H[0] s* op2b.H[0];
 * m1 = op1b.H[1] s* op2b.H[1];
 * m2 = op1t.H[0] s* op2t.H[0];
 * m3 = op1t.H[1] s* op2t.H[1];
 *
 * Rd = Rd + m0 + m1 + m2 + m3;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALDA(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmalda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALDA ===== */

/* ===== Inline Function Start for DSMALXDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMALXDA (Four Signed 16x16 with 64-bit Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMALXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four cross signed 16x16 and add signed multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the top 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
 * with the top 16-bit content of Rs2 with unlimited precision.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.H[0] s* op2b.H[1];
 * m1 = op1b.H[1] s* op2b.H[0];
 * m2 = op1t.H[0] s* op2t.H[1];
 * m3 = op1t.H[1] s* op2t.H[0];
 *
 * Rd = Rd + m0 + m1 + m2 + m3;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALXDA(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmalxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALXDA ===== */

/* ===== Inline Function Start for DSMALDS ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMALDS (Four Signed 16x16 with 64-bit Add and Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMALDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four signed 16x16 and add and subtraction signed multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the top 16-bit content of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.H[1] s* op2b.H[1];
 * m1 = op1b.H[0] s* op2b.H[0];
 * m2 = op1t.H[1] s* op2t.H[1];
 * m3 = op1t.H[0] s* op2t.H[0];
 *
 * Rd = Rd + m0 - m1 + m2 - m3;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALDS(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmalds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALDS ===== */

/* ===== Inline Function Start for DSMALDRS ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMALDRS (Four Signed 16x16 with 64-bit Add and Revered Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMALDRS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16x16 and add and revered subtraction signed multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the top 16-bit content of Rs1 with the top 16-bit content
 * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
 * with the bottom 16-bit content of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.H[0] s* op2b.H[0];
 * m1 = op1b.H[1] s* op2b.H[1];
 * m2 = op1t.H[0] s* op2t.H[0];
 * m3 = op1t.H[1] s* op2t.H[1];
 *
 * Rd = Rd + m0 - m1 + m2 - m3;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALDRS(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmaldrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALDRS ===== */

/* ===== Inline Function Start for DSMALXDS ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DSMALXDS (Four Cross Signed 16x16 with 64-bit Add and Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMALXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four cross signed 16x16 and add and subtraction signed multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 16-bit content of Rs1 with the top 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the bottom 16-bit content of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.H[1] s* op2b.H[0];
 * m1 = op1b.H[0] s* op2b.H[1];
 * m2 = op1t.H[1] s* op2t.H[0];
 * m3 = op1t.H[0] s* op2t.H[1];
 *
 * Rd = Rd + m0 - m1 + m2 - m3;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALXDS(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmalxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALXDS ===== */

/* ===== Inline Function Start for DSMSLDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DSMSLDA (Four Signed 16x16 with 64-bit Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMSLDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four signed 16x16 and subtraction signed multiplication results and add a third register. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.H[0] s* op2b.H[0];
 * m1 = op1b.H[1] s* op2b.H[1];
 * m2 = op1t.H[0] s* op2t.H[0];
 * m3 = op1t.H[1] s* op2t.H[1];
 *
 * Rd = Rd - m0 - m1 - m2 - m3;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMSLDA(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmslda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMSLDA ===== */

/* ===== Inline Function Start for DSMSLXDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DSMSLXDA (Four Cross Signed 16x16 with 64-bit Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMSLXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four signed 16x16 and subtraction signed multiplication results and add a third register. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the top 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.H[0] s* op2b.H[1];
 * m1 = op1b.H[1] s* op2b.H[0];
 * m2 = op1t.H[0] s* op2t.H[1];
 * m3 = op1t.H[1] s* op2t.H[0];
 *
 * Rd = Rd - m0 - m1 - m2 - m3;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMSLXDA(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmslxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMSLXDA ===== */

/* ===== Inline Function Start for DDSMAQA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DDSMAQA (Eight Signed 8x8 with 64-bit Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DDSMAQA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do eight signed 8x8 and add signed multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * Do eight signed 8-bit multiplications from eight 8-bit chunks of two registers; and then adds
 * the eight 16-bit results and the content of 64-bit chunks of a third register.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.B[0] s* op2b.B[0];
 * m1 = op1b.B[1] s* op2b.B[1];
 * m2 = op1b.B[2] s* op2b.B[2];
 * m3 = op1b.B[3] s* op2b.B[3];
 * m4 = op1t.B[0] s* op2t.B[0];
 * m5 = op1t.B[1] s* op2t.B[1];
 * m6 = op1t.B[2] s* op2t.B[2];
 * m7 = op1t.B[3] s* op2t.B[3];
 *
 * s0 = m0 + m1 + m2 + m3;
 * s1 = m4 + m5 + m6 + m7;
 * Rd = Rd + s0 + s1;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DDSMAQA(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("ddsmaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DDSMAQA ===== */

/* ===== Inline Function Start for DDSMAQA.SU ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DDSMAQA.SU (Eight Signed 8 x Unsigned 8 with 64-bit Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DDSMAQA.SU Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do eight signed 8 x unsigned 8 and add signed multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * Do eight signed 8 x unsigned 8 and add signed multiplication results and a third register; and then adds
 * the eight 16-bit results and the content of 64-bit chunks of a third register.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.B[0] su* op2b.B[0];
 * m1 = op1b.B[1] su* op2b.B[1];
 * m2 = op1b.B[2] su* op2b.B[2];
 * m3 = op1b.B[3] su* op2b.B[3];
 * m4 = op1t.B[0] su* op2t.B[0];
 * m5 = op1t.B[1] su* op2t.B[1];
 * m6 = op1t.B[2] su* op2t.B[2];
 * m7 = op1t.B[3] su* op2t.B[3];
 *
 * s0 = m0 + m1 + m2 + m3;
 * s1 = m4 + m5 + m6 + m7;
 * Rd = Rd + s0 + s1;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DDSMAQA_SU(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("ddsmaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DDSMAQA.SU ===== */

/* ===== Inline Function Start for DDUMAQA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DDUMAQA (Eight Unsigned 8x8 with 64-bit Unsigned Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DDUMAQA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do eight unsigned 8x8 and add unsigned multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * Do eight unsigned 8x8 and add unsigned multiplication results and a third register; and then adds
 * the eight 16-bit results and the content of 64-bit chunks of a third register.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.B[0] u* op2b.B[0];
 * m1 = op1b.B[1] u* op2b.B[1];
 * m2 = op1b.B[2] u* op2b.B[2];
 * m3 = op1b.B[3] u* op2b.B[3];
 * m4 = op1t.B[0] u* op2t.B[0];
 * m5 = op1t.B[1] u* op2t.B[1];
 * m6 = op1t.B[2] u* op2t.B[2];
 * m7 = op1t.B[3] u* op2t.B[3];
 *
 * s0 = m0 + m1 + m2 + m3;
 * s1 = m4 + m5 + m6 + m7;
 * Rd = Rd + s0 + s1;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DDUMAQA(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("ddumaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DDUMAQA ===== */

/* ===== Inline Function Start for DSMA32.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMA32.u (64-bit SIMD 32-bit Signed Multiply Addition With Rounding and Clip)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMA32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and add signed multiplication results with Rounding, then right shift 32-bit and clip q63 to q31.
 * The result is written to Rd.
 *
 * **Description**:\n
 * For the `DSMA32.u` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the top 32-bit Q31
 * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with
 * the bottom 32-bit Q31 content of 64-bit chunks in Rs2.
 * Then, do the addtion for the results above and perform the addtional rounding operations, and then move the data to the right
 * by 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd = (q31_t)((Rs1.W[x] s* Rs2.W[x] + Rs1.W[x + 1] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_DSMA32_U(unsigned long long a, unsigned long long b)
{
    long result;
    __ASM volatile("dsma32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMA32.u ===== */

/* ===== Inline Function Start for DSMXS32.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMXS32.u (64-bit SIMD 32-bit Signed Multiply Cross Subtraction With Rounding and Clip)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMXS32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross signed 32x32 and sub signed multiplication results with Rounding, then right shift 32-bit and clip q63 to
 * q31. The result is written to Rd.
 *
 * **Description**:\n
 * For the `DSMXS32.u` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit
 * Q31 content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1
 * with the top 32-bit Q31 content of 64-bit chunks in Rs2.
 * Then, do the subtraction for the results above and perform the addtional rounding operations, and then move the data to the right by
 * 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd = (q31_t)((Rs1.W[x + 1] s* Rs2.W[x] - Rs1.W[x] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_DSMXS32_U(unsigned long long a, unsigned long long b)
{
    long result;
    __ASM volatile("dsmxs32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMXS32.u ===== */

/* ===== Inline Function Start for DSMXA32.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMXA32.u (64-bit SIMD 32-bit Signed Cross Multiply Addition with Rounding and Clip)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMXA32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross signed 32x32 and add signed multiplication results with Rounding, then right shift 32-bit and clip q63 to
 * q31. The result is written to Rd.
 *
 * **Description**:\n
 * For the `DSMXA32.u` instruction,multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit Q31
 * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with
 * the top 32-bit Q31 content of 64-bit chunks in Rs2.
 * Then, do the addtion for the results above and perform the addtional rounding operations, and then move the data to the right
 * by 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd = (q31_t)((Rs1.W[x + 1] s* Rs2.W[x] + Rs1.W[x] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_DSMXA32_U(unsigned long long a, unsigned long long b)
{
    long result;
    __ASM volatile("dsmxa32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMXA32.u ===== */

/* ===== Inline Function Start for DSMS32.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMS32.u (64-bit SIMD 32-bit Signed Multiply Subtraction with Rounding and Clip)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMS32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and sub signed multiplication results with Rounding, then right shift 32-bit and clip q63 to q31. The
 * result is written to Rd.
 *
 * **Description**:\n
 * For the `DSMS32.u` instruction, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit
 * Q31 content of 64-bit chunks in Rs2. At the same time, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with
 * the top 32-bit Q31 content of 64-bit chunks in Rs2.
 * Then, do the subtraction for the results above and perform the addtional rounding operations, and then move the data to the right by
 * 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd = (q31_t)((Rs1.W[x] s* Rs2.W[x] - Rs1.W[x + 1] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_DSMS32_U(unsigned long long a, unsigned long long b)
{
    long result;
    __ASM volatile("dsms32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMS32.u ===== */

/* ===== Inline Function Start for DSMADA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMADA16 (Signed Multiply Two Halfs and Two Adds 32-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMADA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications of two 32-bit registers; and then adds the 32-bit results and the 32-bit value of an
 * even/odd pair of registers together.
 * * DSMADA16: rt pair+ top*top + bottom*bottom
 *
 * **Description**:\n
 * This instruction multiplies the per 16-bit content of the 32-bit elements of Rs1 with the corresponding 16-bit content of
 * the 32-bit elements of Rs2. The result is added to the 32-bit value of an even/odd pair of registers specified by Rd(4,1).
 * The 32-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 32-bit value of the
 * register-pair are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * Rd.W = Rd.W + SE32(Mres0[0][31:0]) + SE32(Mres1[0][31:0]) + SE32(Mres0[1][31:0]) + SE32(Mres1[1][31:0]);
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_DSMADA16(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmada16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return (long)t;
}
/* ===== Inline Function End for DSMADA16 ===== */

/* ===== Inline Function Start for DSMAXDA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMAXDA16 (Signed Crossed Multiply Two Halfs and Two Adds 32-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMAXDA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications of two 32-bit registers; and then adds the 32-bit results and the 32-bit value of an
 * even/odd pair of registers together.
 * * DSMAXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
 *
 * **Description**:\n
 * This instruction crossly multiplies the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit
 * elements of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of the 32-bit elements of
 * Rs1 with the top 16-bit content of the 32-bit elements of Rs2 with unlimited precision. The result is added to the 64-bit
 * value of an even/odd pair of registers specified by Rd(4,1).The 64-bit addition result is clipped to 32-bit result.
 *
 * **Operations**:\n
 * ~~~
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
 * Rd.W = Rd.W + SE32(Mres0[0][31:0]) + SE32(Mres1[0][31:0]) + SE32(Mres0[1][31:0]) + SE32(Mres1[1][31:0]);
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_DSMAXDA16(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmaxda16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return (long)t;
}
/* ===== Inline Function End for DSMAXDA16 ===== */

/* ===== Inline Function Start for DKSMS32.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DKSMS32.u (Two Signed Multiply Shift-clip and Saturation with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSMS32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Computes saturated multiplication of two pairs of q31 type with shifted rounding.
 *
 * **Description**:\n
 * Compute the multiplication of Rs1 and Rs2 of type q31_t, intercept [47:16] for the resulting 64-bit product
 * to get the 32-bit number, then add 1 to it to do rounding, and finally saturate the result after rounding.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][63:0] = Rs1.W[x] s* Rs2.W[x];
 * Round[x][32:0] = Mres[x][47:15] + 1;
 * Rd.W[x] = sat.31(Rd.W[x] + Round[x][32:1]);
 * x=1...0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSMS32_U(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dksms32.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKSMS32.u ===== */

/* ===== Inline Function Start for DMADA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DMADA32 ((Two Cross Signed 32x32 with 64-bit Add and Clip to 32-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DMADA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross signed 32x32 and add the signed multiplication results to q63, then clip the q63 result to q31 , the final results
 * are written into Rd.
 *
 * **Description**:\n
 * For the `DMADA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit element in Rs2 and
 * then adds the result to the result of multiplying the bottom 32-bit element in Rs1 with the top 32-bit element in Rs2, then
 * clip the q63 result to q31.
 *
 * **Operations**:\n
 * ~~~
 * res = (q31_t)((((q63_t) Rd.w[0] << 32) + (q63_t)Rs1.w[0] s*  Rs2.w[1] + (q63_t)Rs1.w[1] s*  Rs2.w[0]) s>> 32);
 * rd = res;
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_DMADA32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return (long)t;
}
/* ===== Inline Function End for DMADA32 ===== */

/* ===== Inline Function Start for DSMALBB ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMALBB (Signed Multiply Bottom Halfs & Add 64-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMALBB Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit
 * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result 
 * is written back to the register-pair.
 * * DSMALBB: rt pair + bottom*bottom (all 32-bit elements)
 *
 * **Description**:\n
 * For the `DSMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit content of Rs2.The
 * multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALBB(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmalbb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALBB ===== */

/* ===== Inline Function Start for DSMALBT ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMALBT (Signed Multiply Bottom Half & Top Half & Add 64-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMALBT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit
 * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result
 * is written back to the register-pair.
 * * DSMALBT: rt pair + bottom*top (all 32-bit elements)
 *
 * **Description**:\n
 * For the `DSMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
 * content of the 32-bit elements of Rs2.
 * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
 * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers
 *
 * **Operations**:\n
 * ~~~
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALBT(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmalbt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALBT ===== */

/* ===== Inline Function Start for DSMALTT ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMALTT (Signed Multiply Top Half & Add 64-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMALTT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit
 * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result
 * is written back to the register-pair.
 * * DSMALTT: DSMALTT rt pair + top*top (all 32-bit elements)
 *
 * **Description**:\n
 * For the `DSMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
 * content of the 32-bit elements of Rs2.
 * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
 * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALTT(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmaltt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALTT ===== */

/* ===== Inline Function Start for DKMABB32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DKMABB32 (Saturating Signed Multiply Bottom Words & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMABB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content
 * of 64-bit data in the third register. The addition result may besaturated and is written to the third register.
 * * DKMABB32: rd + bottom*bottom
 *
 * **Description**:\n
 * For the `DKMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit element in Rs2
 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range
 * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd.
 * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[0] * Rs2.W[0]);
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMABB32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmabb32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMABB32 ===== */

/* ===== Inline Function Start for DKMABT32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DKMABT32 (Saturating Signed Multiply Bottom & Top Words & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMABT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content
 * of 64-bit data in the third register. The addition result may be saturated and is written to the third register.
 * * DKMABT32: rd + bottom*top
 *
 * **Description**:\n
 * For the `DKMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit element in Rs2
 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range
 * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd.
 * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[0] * Rs2.W[1]);
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMABT32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmabt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMABT32 ===== */

/* ===== Inline Function Start for DKMATT32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DKMATT32 (Saturating Signed Multiply Bottom & Top Words & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMATT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content
 * of 64-bit data in the third register. The addition result may be saturated and is written to the third register.
 * * DKMATT32: rd + top*top
 *
 * **Description**:\n
 * For the `DKMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit element in Rs2
 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range
 * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd.
 * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[1] * Rs2.W[1]);
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMATT32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmatt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMATT32 ===== */
#endif /* __RISCV_XLEN == 32 */

#elif defined (__ICCRISCV__)

#if __riscv_xlen == 32
#include "iar_nds32_intrinsic.h"
#elif __riscv_xlen == 64
#include "iar_nds64_intrinsic.h"
#else
#error "Unexpected RISC-V XLEN size."
#endif /* __riscv_xlen == 32 */

#pragma language=save
#pragma language=extended

// Redefine those compatible instruction name supplied by IAR
#define __RV_CLROV              __nds__clrov
#define __RV_RDOV               __nds__rdov
#define __RV_ADD8               __nds__add8
#define __RV_SUB8               __nds__sub8
#define __RV_ADD16              __nds__add16
#define __RV_SUB16              __nds__sub16
#define __RV_ADD64              __nds__add64
#define __RV_SUB64              __nds__sub64
#define __RV_RADD8              __nds__radd8
#define __RV_RSUB8              __nds__rsub8
#define __RV_RADD16             __nds__radd16
#define __RV_RSUB16             __nds__rsub16
#define __RV_RADD64             __nds__radd64
#define __RV_RSUB64             __nds__rsub64
#define __RV_RADDW              __nds__raddw
#define __RV_RSUBW              __nds__rsubw
#define __RV_URADD8             __nds__uradd8
#define __RV_URSUB8             __nds__ursub8
#define __RV_URADD16            __nds__uradd16
#define __RV_URSUB16            __nds__ursub16
#define __RV_URADD64            __nds__uradd64
#define __RV_URSUB64            __nds__ursub64
#define __RV_URADDW             __nds__uraddw
#define __RV_URSUBW             __nds__ursubw
#define __RV_KADD8              __nds__kadd8
#define __RV_KSUB8              __nds__ksub8
#define __RV_KADD16             __nds__kadd16
#define __RV_KSUB16             __nds__ksub16
#define __RV_KADD64             __nds__kadd64
#define __RV_KSUB64             __nds__ksub64
#define __RV_KADDH              __nds__kaddh
#define __RV_KSUBH              __nds__ksubh
#define __RV_KADDW              __nds__kaddw
#define __RV_KSUBW              __nds__ksubw
#define __RV_UKADD8             __nds__ukadd8
#define __RV_UKSUB8             __nds__uksub8
#define __RV_UKADD16            __nds__ukadd16
#define __RV_UKSUB16            __nds__uksub16
#define __RV_UKADD64            __nds__ukadd64
#define __RV_UKSUB64            __nds__uksub64
#define __RV_UKADDH             __nds__ukaddh
#define __RV_UKSUBH             __nds__uksubh
#define __RV_UKADDW             __nds__ukaddw
#define __RV_UKSUBW             __nds__uksubw
#define __RV_CRAS16             __nds__cras16
#define __RV_CRSA16             __nds__crsa16
#define __RV_RCRAS16            __nds__rcras16
#define __RV_RCRSA16            __nds__rcrsa16
#define __RV_URCRAS16           __nds__urcras16
#define __RV_URCRSA16           __nds__urcrsa16
#define __RV_KCRAS16            __nds__kcras16
#define __RV_KCRSA16            __nds__kcrsa16
#define __RV_UKCRAS16           __nds__ukcras16
#define __RV_UKCRSA16           __nds__ukcrsa16
#define __RV_SRA8               __nds__sra8
#define __RV_SRAI8              __nds__sra8
#define __RV_SRA16              __nds__sra16
#define __RV_SRAI16             __nds__sra16
#define __RV_SRL8               __nds__srl8
#define __RV_SRL16              __nds__srl16
#define __RV_SLL8               __nds__sll8
#define __RV_SLL16              __nds__sll16
#define __RV_SRA_U              __nds__sra_u
#define __RV_SRA8_U             __nds__sra8_u
#define __RV_SRA16_U            __nds__sra16_u
#define __RV_SRL8_U             __nds__srl8_u
#define __RV_SRL16_U            __nds__srl16_u
#define __RV_KSLL8              __nds__ksll8
#define __RV_KSLL16             __nds__ksll16
#define __RV_KSLLW              __nds__ksllw
#define __RV_KSLRA8             __nds__kslra8
#define __RV_KSLRA8_U           __nds__kslra8_u
#define __RV_KSLRA16            __nds__kslra16
#define __RV_KSLRA16_U          __nds__kslra16_u
#define __RV_KSLRAW             __nds__kslraw
#define __RV_KSLRAW_U           __nds__kslraw_u
#define __RV_CMPEQ8             __nds__cmpeq8
#define __RV_CMPEQ16            __nds__cmpeq16
#define __RV_SCMPLE8            __nds__scmple8
#define __RV_SCMPLE16           __nds__scmple16
#define __RV_SCMPLT8            __nds__scmplt8
#define __RV_SCMPLT16           __nds__scmplt16
#define __RV_UCMPLE8            __nds__ucmple8
#define __RV_UCMPLE16           __nds__ucmple16
#define __RV_UCMPLT8            __nds__ucmplt8
#define __RV_UCMPLT16           __nds__ucmplt16
#define __RV_SMUL8              __nds__smul8
#define __RV_UMUL8              __nds__umul8
#define __RV_SMUL16             __nds__smul16
#define __RV_UMUL16             __nds__umul16
#define __RV_SMULX8             __nds__smulx8
#define __RV_UMULX8             __nds__umulx8
#define __RV_SMULX16            __nds__smulx16
#define __RV_UMULX16            __nds__umulx16
#define __RV_KHM8               __nds__khm8
#define __RV_KHMX8              __nds__khmx8
#define __RV_KHM16              __nds__khm16
#define __RV_KHMX16             __nds__khmx16
#define __RV_MULR64             __nds__mulr64
#define __RV_MULSR64            __nds__mulsr64
#define __RV_SMMUL              __nds__smmul
#define __RV_SMMUL_U            __nds__smmul_u
#define __RV_WEXT               __nds__wext
#define __RV_SUNPKD810          __nds__sunpkd810
#define __RV_SUNPKD820          __nds__sunpkd820
#define __RV_SUNPKD830          __nds__sunpkd830
#define __RV_SUNPKD831          __nds__sunpkd831
#define __RV_SUNPKD832          __nds__sunpkd832
#define __RV_ZUNPKD810          __nds__zunpkd810
#define __RV_ZUNPKD820          __nds__zunpkd820
#define __RV_ZUNPKD830          __nds__zunpkd830
#define __RV_ZUNPKD831          __nds__zunpkd831
#define __RV_ZUNPKD832          __nds__zunpkd832
#define __RV_PKBB16             __nds__pkbb16
#define __RV_PKBT16             __nds__pkbt16
#define __RV_PKTT16             __nds__pktt16
#define __RV_PKTB16             __nds__pktb16
#define __RV_KMMAC              __nds__kmmac
#define __RV_KMMAC_U            __nds__kmmac_u
#define __RV_KMMSB              __nds__kmmsb
#define __RV_KMMSB_U            __nds__kmmsb_u
#define __RV_KWMMUL             __nds__kwmmul
#define __RV_KWMMUL_U           __nds__kwmmul_u
#define __RV_SMMWB              __nds__smmwb
#define __RV_SMMWB_U            __nds__smmwb_u
#define __RV_SMMWT              __nds__smmwt
#define __RV_SMMWT_U            __nds__smmwt_u
#define __RV_KMMAWB             __nds__kmmawb
#define __RV_KMMAWB_U           __nds__kmmawb_u
#define __RV_KMMAWT             __nds__kmmawt
#define __RV_KMMAWT_U           __nds__kmmawt_u
#define __RV_KMMWB2             __nds__kmmwb2
#define __RV_KMMWB2_U           __nds__kmmwb2_u
#define __RV_KMMWT2             __nds__kmmwt2
#define __RV_KMMWT2_U           __nds__kmmwt2_u
#define __RV_KMMAWB2            __nds__kmmawb2
#define __RV_KMMAWB2_U          __nds__kmmawb2_u
#define __RV_KMMAWT2            __nds__kmmawt2
#define __RV_KMMAWT2_U          __nds__kmmawt2_u
#define __RV_SMBB16             __nds__smbb16
#define __RV_SMBT16             __nds__smbt16
#define __RV_SMTT16             __nds__smtt16
#define __RV_KMDA               __nds__kmda
#define __RV_KMXDA              __nds__kmxda
#define __RV_SMDS               __nds__smds
#define __RV_SMDRS              __nds__smdrs
#define __RV_SMXDS              __nds__smxds
#define __RV_KMABB              __nds__kmabb
#define __RV_KMABT              __nds__kmabt
#define __RV_KMATT              __nds__kmatt
#define __RV_KMADA              __nds__kmada
#define __RV_KMAXDA             __nds__kmaxda
#define __RV_KMADS              __nds__kmads
#define __RV_KMADRS             __nds__kmadrs
#define __RV_KMAXDS             __nds__kmaxds
#define __RV_KMSDA              __nds__kmsda
#define __RV_KMSXDA             __nds__kmsxda
#define __RV_SMAL               __nds__smal
#define __RV_SMAQA              __nds__smaqa
#define __RV_UMAQA              __nds__umaqa
#define __RV_SMAQA_SU           __nds__smaqa_su
#define __RV_SMAR64             __nds__smar64
#define __RV_SMSR64             __nds__smsr64
#define __RV_UMAR64             __nds__umar64
#define __RV_UMSR64             __nds__umsr64
#define __RV_KMAR64             __nds__kmar64
#define __RV_KMSR64             __nds__kmsr64
#define __RV_UKMAR64            __nds__ukmar64
#define __RV_UKMSR64            __nds__ukmsr64
#define __RV_SMALBB             __nds__smalbb
#define __RV_SMALBT             __nds__smalbt
#define __RV_SMALTT             __nds__smaltt
#define __RV_SMALDA             __nds__smalda
#define __RV_SMALXDA            __nds__smalxda
#define __RV_SMALDS             __nds__smalds
#define __RV_SMALDRS            __nds__smaldrs
#define __RV_SMALXDS            __nds__smalxds
#define __RV_SMSLDA             __nds__smslda
#define __RV_SMSLXDA            __nds__smslxda
#define __RV_MINW               __nds__minw
#define __RV_MAXW               __nds__maxw
#define __RV_SMIN8              __nds__smin8
#define __RV_SMAX8              __nds__smax8
#define __RV_SMIN16             __nds__smin16
#define __RV_SMAX16             __nds__smax16
#define __RV_UMIN8              __nds__umin8
#define __RV_UMAX8              __nds__umax8
#define __RV_UMIN16             __nds__umin16
#define __RV_UMAX16             __nds__umax16
#define __RV_KABS8              __nds__kabs8
#define __RV_KABS16             __nds__kabs16
#define __RV_KABSW              __nds__kabsw
#define __RV_SCLIP8             __nds__sclip8
#define __RV_SCLIP16            __nds__sclip16
#define __RV_SCLIP32            __nds__sclip32
#define __RV_UCLIP8             __nds__uclip8
#define __RV_UCLIP16            __nds__uclip16
#define __RV_UCLIP32            __nds__uclip32
#define __RV_CLO8               __nds__clo8
#define __RV_CLO16              __nds__clo16
#define __RV_CLO32              __nds__clo32
#define __RV_CLZ8               __nds__clz8
#define __RV_CLZ16              __nds__clz16
#define __RV_CLZ32              __nds__clz32
#define __RV_CLRS8              __nds__clrs8
#define __RV_CLRS16             __nds__clrs16
#define __RV_CLRS32             __nds__clrs32
#define __RV_SWAP8              __nds__swap8
#define __RV_SWAP16             __nds__swap16
#define __RV_KHMBB              __nds__khmbb
#define __RV_KHMBT              __nds__khmbt
#define __RV_KHMTT              __nds__khmtt
#define __RV_KDMBB              __nds__kdmbb
#define __RV_KDMBT              __nds__kdmbt
#define __RV_KDMTT              __nds__kdmtt
#define __RV_KDMABB             __nds__kdmabb
#define __RV_KDMABT             __nds__kdmabt
#define __RV_KDMATT             __nds__kdmatt
#define __RV_MADDR32            __nds__maddr32
#define __RV_MSUBR32            __nds__msubr32
#define __RV_PBSAD              __nds__pbsad
#define __RV_PBSADA             __nds__pbsada
#define __RV_AVE                __nds__ave
#define __RV_BITREV             __nds__bitrev
#define __RV_INSB               __nds__insb

#if (__riscv_xlen == 64)
#define __RV_ADD32              __nds__add32
#define __RV_SUB32              __nds__sub32
#define __RV_RADD32             __nds__radd32
#define __RV_RSUB32             __nds__rsub32
#define __RV_URADD32            __nds__uradd32
#define __RV_URSUB32            __nds__ursub32
#define __RV_KADD32             __nds__kadd32
#define __RV_KSUB32             __nds__ksub32
#define __RV_UKADD32            __nds__ukadd32
#define __RV_UKSUB32            __nds__uksub32
#define __RV_CRAS32             __nds__cras32
#define __RV_CRSA32             __nds__crsa32
#define __RV_RCRAS32            __nds__rcras32
#define __RV_RCRSA32            __nds__rcrsa32
#define __RV_URCRAS32           __nds__urcras32
#define __RV_URCRSA32           __nds__urcrsa32
#define __RV_KCRAS32            __nds__kcras32
#define __RV_KCRSA32            __nds__kcrsa32
#define __RV_UKCRAS32           __nds__ukcras32
#define __RV_UKCRSA32           __nds__ukcrsa32
#define __RV_SRA32              __nds__sra32
#define __RV_SRAI32             __nds__sra32
#define __RV_SRL32              __nds__srl32
#define __RV_SLL32              __nds__sll32
#define __RV_SLLI32             __nds__sll32
#define __RV_SRAW_U             __nds__sraw_u
#define __RV_SRA32_U            __nds__sra32_u
#define __RV_SRL32_U            __nds__srl32_u
#define __RV_KSLL32             __nds__ksll32
#define __RV_KSLRA32            __nds__kslra32
#define __RV_KSLRA32_U          __nds__kslra32_u
#define __RV_SMBB32             __nds__smbb32
#define __RV_SMBT32             __nds__smbt32
#define __RV_SMTT32             __nds__smtt32
#define __RV_PKBB32             __nds__pkbb32
#define __RV_PKBT32             __nds__pkbt32
#define __RV_PKTT32             __nds__pktt32
#define __RV_PKTB32             __nds__pktb32
#define __RV_SMIN32             __nds__smin32
#define __RV_SMAX32             __nds__smax32
#define __RV_UMIN32             __nds__umin32
#define __RV_UMAX32             __nds__umax32
#define __RV_KABS32             __nds__kabs32
#define __RV_KHMBB16            __nds__khmbb16
#define __RV_KHMBT16            __nds__khmbt16
#define __RV_KHMTT16            __nds__khmtt16
#define __RV_KDMBB16            __nds__kdmbb16
#define __RV_KDMBT16            __nds__kdmbt16
#define __RV_KDMTT16            __nds__kdmtt16
#define __RV_KDMABB16           __nds__kdmabb16
#define __RV_KDMABT16           __nds__kdmabt16
#define __RV_KDMATT16           __nds__kdmatt16
#define __RV_KMABB32            __nds__kmabb32
#define __RV_KMABT32            __nds__kmabt32
#define __RV_KMATT32            __nds__kmatt32
#define __RV_KMDA32             __nds__kmda32
#define __RV_KMXDA32            __nds__kmxda32
#define __RV_KMADA32            __nds__kmada32
#define __RV_KMAXDA32           __nds__kmaxda32
#define __RV_KMADS32            __nds__kmads32
#define __RV_KMADRS32           __nds__kmadrs32
#define __RV_KMAXDS32           __nds__kmaxds32
#define __RV_KMSDA32            __nds__kmsda32
#define __RV_KMSXDA32           __nds__kmsxda32
#define __RV_SMDS32             __nds__smds32
#define __RV_SMDRS32            __nds__smdrs32
#define __RV_SMXDS32            __nds__smxds32
#endif /* __riscv_xlen == 64 */

// For now, the P-extention version of IAR IDE is 0.5.0, but Nuclei's supports 0.5.4
// so Nuclei supplies a workround to add custom instructions of those not natively
// supported by the IAR Assembler. Note that __RV_BPICK remains to be implemented in future.
// And we only implement Xxldsp Nuclei custom instruction set, bpick not implemented, expdxx
// implemented in c, not via .insn variant

#pragma inline=forced_no_body
unsigned long __RV_STAS16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x7A,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_RSTAS16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x5A,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_KSTAS16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x62,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_URSTAS16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x6A,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_UKSTAS16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x72,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_STSA16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x7B,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_RSTSA16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x5B,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_KSTSA16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x63,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_URSTSA16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x6B,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_UKSTSA16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x73,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

// #pragma inline=forced_no_body
// unsigned long __RV_BPICK(unsigned long a, unsigned long b, unsigned long c) {
    // TODO: remains to be done
// }

// RV64 only
#pragma inline=forced_no_body
unsigned long __RV_STAS32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x78,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_RSTAS32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x58,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_KSTAS32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x60,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_URSTAS32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x68,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_UKSTAS32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x70,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_STSA32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x79,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_RSTSA32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x59,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_KSTSA32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x61,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_URSTSA32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x69,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_UKSTSA32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x71,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_EXPD80(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)(a & 0xff));
}

#pragma inline=forced_no_body
unsigned long __RV_EXPD81(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)((a >> 8) & 0xff));
}

#pragma inline=forced_no_body
unsigned long __RV_EXPD82(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)((a >> 16) & 0xff));
}

#pragma inline=forced_no_body
unsigned long __RV_EXPD83(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)((a >> 24) & 0xff));
}

#if __RISCV_XLEN == 64
// RV64 only
#pragma inline=forced_no_body
unsigned long __RV_EXPD84(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)((a >> 32) & 0xff));
}

#pragma inline=forced_no_body
unsigned long __RV_EXPD85(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)((a >> 40) & 0xff));
}

#pragma inline=forced_no_body
unsigned long __RV_EXPD86(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)((a >> 48) & 0xff));
}

#pragma inline=forced_no_body
unsigned long __RV_EXPD87(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)((a >> 56) & 0xff));
}
#endif
#pragma language=restore

#else
    #error Unknown compiler
#endif /* __ICCRISCV__ */


/* XXXXX ARM Compatiable SIMD API XXXXX */
/** \brief Q setting quad 8-bit saturating addition. */
#define __QADD8(x, y)               __RV_KADD8(x, y)
/** \brief Q setting quad 8-bit saturating subtract. */
#define __QSUB8(x, y)               __RV_KSUB8((x), (y))
/** \brief Q setting dual 16-bit saturating addition. */
#define __QADD16(x, y)              __RV_KADD16((x), (y))
/** \brief Dual 16-bit signed addition with halved results. */
#define __SHADD16(x, y)             __RV_RADD16((x), (y))
/** \brief Q setting dual 16-bit saturating subtract. */
#define __QSUB16(x, y)              __RV_KSUB16((x), (y))
/** \brief Dual 16-bit signed subtraction with halved results. */
#define __SHSUB16(x, y)             __RV_RSUB16((x), (y))
/** \brief Q setting dual 16-bit add and subtract with exchange. */
#define __QASX(x, y)                __RV_KCRAS16((x), (y))
/** \brief Dual 16-bit signed addition and subtraction with halved results.*/
#define __SHASX(x, y)               __RV_RCRAS16((x), (y))
/** \brief Q setting dual 16-bit subtract and add with exchange. */
#define __QSAX(x, y)                __RV_KCRSA16((x), (y))
/** \brief Dual 16-bit signed subtraction and addition with halved results.*/
#define __SHSAX(x, y)               __RV_RCRSA16((x), (y))
/** \brief Dual 16-bit signed multiply with exchange returning difference. */
#define __SMUSDX(x, y)              __RV_SMXDS((y), (x))
/** \brief Q setting sum of dual 16-bit signed multiply with exchange. */
__STATIC_FORCEINLINE long __SMUADX (unsigned long op1, unsigned long op2)
{
    return __RV_KMXDA(op1, op2);
}
/** \brief Q setting saturating add. */
#define __QADD(x, y)                __RV_KADDW((x), (y))
/** \brief Q setting saturating subtract. */
#define __QSUB(x, y)                __RV_KSUBW((x), (y))
/** \brief Q setting dual 16-bit signed multiply with single 32-bit accumulator. */
__STATIC_FORCEINLINE long __SMLAD(unsigned long op1, unsigned long op2, long acc)
{
    return __RV_KMADA(acc, op1, op2);
}
/** \brief Q setting pre-exchanged dual 16-bit signed multiply with single 32-bit accumulator.  */
__STATIC_FORCEINLINE long __SMLADX(unsigned long op1, unsigned long op2, long acc)
{
    return __RV_KMAXDA(acc, op1, op2);
}
/** \brief Q setting dual 16-bit signed multiply with exchange subtract with 32-bit accumulate.  */
__STATIC_FORCEINLINE long __SMLSDX(unsigned long op1, unsigned long op2, long acc)
{
    return (acc - __RV_SMXDS(op1, op2));
}
/** \brief Dual 16-bit signed multiply with single 64-bit accumulator. */
__STATIC_FORCEINLINE long long __SMLALD(unsigned long op1, unsigned long op2, long long acc)
{
    return __RV_SMALDA(acc, op1, op2);
}
/** \brief Dual 16-bit signed multiply with exchange with single 64-bit accumulator.  */
__STATIC_FORCEINLINE long long __SMLALDX(unsigned long op1, unsigned long op2, long long acc)
{
    return __RV_SMALXDA(acc, op1, op2);
}
/** \brief Q setting sum of dual 16-bit signed multiply. */
__STATIC_FORCEINLINE long __SMUAD(unsigned long op1, unsigned long op2)
{
    return __RV_KMDA(op1, op2);
}
/** \brief Dual 16-bit signed multiply returning difference. */
__STATIC_FORCEINLINE long __SMUSD(unsigned long op1, unsigned long op2)
{
    return __RV_SMDRS(op1, op2);
}
/** \brief Dual extract 8-bits and sign extend each to 16-bits. */
#define __SXTB16(x)             __RV_SUNPKD820(x)
/** \brief Dual extracted 8-bit to 16-bit signed addition. TODO Need test */
__STATIC_FORCEINLINE unsigned long __SXTAB16(unsigned long op1, unsigned long op2)
{
    return __RV_ADD16(op1, __RV_SUNPKD820(op2));
}
#define __SXTAB16_RORn(ARG1, ARG2, ROTATE)        __SXTAB16(ARG1, __ROR(ARG2, ROTATE))

/** \brief 32-bit signed multiply with 32-bit truncated accumulator. */
__STATIC_FORCEINLINE long __SMMLA(long op1, long op2, long acc)
{
    long mul;
    mul = __RV_SMMUL(op1, op2);
    return (acc + mul);
}
#define __DKHM8                 __RV_DKHM8
#define __DKHM16                __RV_DKHM16
#define __DKSUB16               __RV_DKSUB16
#define __SMAQA                 __RV_SMAQA
#define __MULSR64               __RV_MULSR64
#define __DQADD8                __RV_DKADD8
#define __DQSUB8                __RV_DKSUB8
#define __DKADD16               __RV_DKADD16
#define __PKBB16                __RV_PKBB16
#define __DKSLRA16              __RV_DKSLRA16
#define __DKSLRA8               __RV_DKSLRA8
#define __KABSW                 __RV_KABSW
#define __DKABS8                __RV_DKABS8
#define __DKABS16               __RV_DKABS16
#define __SMALDA                __RV_SMALDA
#define __SMSLDA                __RV_SMSLDA
#define __SMALBB                __RV_SMALBB
#define __SUB64                 __RV_SUB64
#define __ADD64                 __RV_ADD64
#define __SMBB16                __RV_SMBB16
#define __SMBT16                __RV_SMBT16
#define __SMTT16                __RV_SMTT16
#define __EXPD80                __RV_EXPD80
#define __SMAX8                 __RV_SMAX8
#define __SMAX16                __RV_SMAX16
#define __PKTT16                __RV_PKTT16
#define __KADD16                __RV_KADD16
#define __SADD16                __RV_ADD16
#define __SSUB8                 __RV_KSUB8
#define __SADD8                 __RV_KADD8
#define __USAT16                __RV_UCLIP16
#define __SMALTT                __RV_SMALTT

/** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3. */
#define __PKHBT(ARG1, ARG2, ARG3)  ((ARG3 == 0) ? __RV_PKTB16(ARG2, ARG1) :              \
                                   (ARG3 == 16) ? __RV_PKBB16(ARG2, ARG1) :              \
                                   (((((uint32_t)(ARG1))          ) & 0x0000FFFFUL) |    \
                                   ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)))

/** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3. */
#define __PKHTB(ARG1, ARG2, ARG3)  ((ARG3 == 0) ? __RV_PKTB16(ARG1, ARG2) :              \
                                   (ARG3 == 16) ? __RV_PKTT16(ARG1, ARG2) :              \
                                   (((((uint32_t)(ARG1))          ) & 0xFFFF0000UL) |    \
                                   ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)))

#if __RISCV_XLEN == 64
/** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3，
    and also combines the [47:32] of val1 with bits[63:48] of val2 with the val3, finally pack the two new 32-bits to 64-bit. */
#define __PKHBT64(ARG1, ARG2, ARG3)  ((ARG3 == 0) ? __RV_PKTB16(ARG2, ARG1) :             \
                                   (ARG3 == 16) ? __RV_PKBB16(ARG2, ARG1) :              \
                                   ((int64_t)((((uint32_t)((uint64_t)ARG1 >> 32)) & 0x0000FFFFUL) |           \
                                   ((((uint32_t)((uint64_t)ARG2 >> 32)) << (ARG3)) & 0xFFFF0000UL)) << 32) |  \
                                   ((int64_t)(((((uint32_t)(ARG1))) & 0x0000FFFFUL) |                         \
                                   ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)) & 0xFFFFFFFFUL))

/** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3，
    and also combines bits [63:48] of val1 with bits [47:32] of val2 with the val3, finally pack the two new 32bits to 64bits. */
#define __PKHTB64(ARG1, ARG2, ARG3)  ((ARG3 == 0) ? __RV_PKTB16(ARG1, ARG2) :              \
                                   (ARG3 == 16) ? __RV_PKTT16(ARG1, ARG2) :              \
                                   ((uint64_t)(((uint32_t)((uint64_t)ARG1 >> 32) & 0xFFFF0000UL) |            \
                                   ((((uint32_t)((uint64_t)ARG2 >> 32)) >> (ARG3)) & 0x0000FFFFUL)) << 32) |  \
                                   ((uint64_t)(((uint32_t)(ARG1) & 0xFFFF0000UL) |                            \
                                   ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)) & 0xFFFFFFFFUL))
#else
/** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3，
    and also combines the [47:32] of val1 with bits[63:48] of val2 with the val3, finally pack the two new 32-bits to 64-bit. */
#define __PKHBT64(ARG1, ARG2, ARG3)  ((ARG3 == 0) ? __RV_DPKTB16(ARG2, ARG1) :             \
                                   (ARG3 == 16) ? __RV_DPKBB16(ARG2, ARG1) :              \
                                   ((int64_t)((((uint32_t)((uint64_t)ARG1 >> 32)) & 0x0000FFFFUL) |           \
                                   ((((uint32_t)((uint64_t)ARG2 >> 32)) << (ARG3)) & 0xFFFF0000UL)) << 32) |  \
                                   ((int64_t)(((((uint32_t)(ARG1))) & 0x0000FFFFUL) |                         \
                                   ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)) & 0xFFFFFFFFUL))

/** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3，
    and also combines bits [63:48] of val1 with bits [47:32] of val2 with the val3, finally pack the two new 32bits to 64bits. */
#define __PKHTB64(ARG1, ARG2, ARG3)  ((ARG3 == 0) ? __RV_DPKTB16(ARG1, ARG2) :              \
                                   (ARG3 == 16) ? __RV_DPKTT16(ARG1, ARG2) :              \
                                   ((uint64_t)(((uint32_t)((uint64_t)ARG1 >> 32) & 0xFFFF0000UL) |            \
                                   ((((uint32_t)((uint64_t)ARG2 >> 32)) >> (ARG3)) & 0x0000FFFFUL)) << 32) |  \
                                   ((uint64_t)(((uint32_t)(ARG1) & 0xFFFF0000UL) |                            \
                                   ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)) & 0xFFFFFFFFUL))
#endif /* __RISCV_XLEN == 64 */

/** first rotate then extract. This is more suitable for arm compiler for it can rotate and extract in one command*/
#define __SXTB16_RORn(ARG1, ARG2)   __RV_SUNPKD820(__ROR(ARG1, ARG2))

#endif /* defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) */

#ifdef __cplusplus
}
#endif

#endif /* __CORE_FEATURE_DSP__ */