Nuclei-Software
/
nuclei-sdk
зеркало из https://github-proxy.rt-thread.io/Nuclei-Software/nuclei-sdk.git


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949159501595115952159531595415955159561595715958159591596015961159621596315964159651596615967159681596915970159711597215973159741597515976159771597815979159801598115982159831598415985159861598715988159891599015991159921599315994159951599615997159981599916000160011600216003160041600516006160071600816009160101601116012160131601416015160161601716018160191602016021160221602316024160251602616027160281602916030160311603216033160341603516036160371603816039160401604116042160431604416045160461604716048160491605016051160521605316054160551605616057160581605916060160611606216063160641606516066160671606816069160701607116072160731607416075160761607716078160791608016081160821608316084160851608616087160881608916090160911609216093160941609516096160971609816099161001610116102161031610416105161061610716108161091611016111161121611316114161151611616117161181611916120161211612216123161241612516126161271612816129161301613116132161331613416135161361613716138161391614016141161421614316144161451614616147161481614916150161511615216153161541615516156161571615816159161601616116162161631616416165161661616716168161691617016171161721617316174161751617616177161781617916180161811618216183161841618516186161871618816189161901619116192161931619416195161961619716198161991620016201162021620316204162051620616207162081620916210162111621216213162141621516216162171621816219162201622116222162231622416225162261622716228162291623016231162321623316234162351623616237162381623916240162411624216243162441624516246162471624816249162501625116252162531625416255162561625716258162591626016261162621626316264162651626616267162681626916270162711627216273162741627516276162771627816279162801628116282162831628416285162861628716288162891629016291162921629316294162951629616297162981629916300163011630216303163041630516306163071630816309163101631116312163131631416315163161631716318163191632016321163221632316324163251632616327163281632916330163311633216333163341633516336163371633816339163401634116342163431634416345163461634716348163491635016351163521635316354163551635616357163581635916360163611636216363163641636516366163671636816369163701637116372163731637416375163761637716378163791638016381163821638316384163851638616387163881638916390163911639216393163941639516396163971639816399164001640116402164031640416405164061640716408164091641016411164121641316414164151641616417164181641916420164211642216423164241642516426164271642816429164301643116432164331643416435164361643716438164391644016441164421644316444164451644616447164481644916450164511645216453164541645516456164571645816459164601646116462164631646416465164661646716468164691647016471164721647316474164751647616477164781647916480164811648216483164841648516486164871648816489164901649116492164931649416495164961649716498164991650016501165021650316504165051650616507165081650916510165111651216513165141651516516165171651816519165201652116522165231652416525165261652716528165291653016531165321653316534165351653616537165381653916540165411654216543165441654516546165471654816549165501655116552165531655416555165561655716558165591656016561165621656316564165651656616567165681656916570165711657216573165741657516576165771657816579165801658116582165831658416585165861658716588165891659016591165921659316594165951659616597165981659916600166011660216603166041660516606166071660816609166101661116612166131661416615166161661716618166191662016621166221662316624166251662616627166281662916630166311663216633166341663516636166371663816639166401664116642166431664416645166461664716648166491665016651166521665316654166551665616657166581665916660166611666216663166641666516666166671666816669166701667116672166731667416675166761667716678166791668016681166821668316684166851668616687166881668916690166911669216693166941669516696166971669816699167001670116702167031670416705167061670716708167091671016711167121671316714167151671616717167181671916720167211672216723167241672516726167271672816729167301673116732167331673416735167361673716738167391674016741167421674316744167451674616747167481674916750167511675216753167541675516756167571675816759167601676116762167631676416765167661676716768167691677016771167721677316774167751677616777167781677916780167811678216783167841678516786167871678816789167901679116792167931679416795167961679716798167991680016801168021680316804168051680616807168081680916810168111681216813168141681516816168171681816819168201682116822168231682416825168261682716828168291683016831168321683316834168351683616837168381683916840168411684216843168441684516846168471684816849168501685116852168531685416855168561685716858168591686016861168621686316864168651686616867168681686916870168711687216873168741687516876168771687816879168801688116882168831688416885168861688716888168891689016891168921689316894168951689616897168981689916900169011690216903169041690516906169071690816909169101691116912169131691416915169161691716918169191692016921169221692316924169251692616927169281692916930169311693216933169341693516936169371693816939169401694116942169431694416945169461694716948169491695016951169521695316954169551695616957169581695916960169611696216963169641696516966169671696816969169701697116972169731697416975169761697716978169791698016981169821698316984169851698616987169881698916990169911699216993169941699516996169971699816999170001700117002170031700417005170061700717008170091701017011170121701317014170151701617017170181701917020170211702217023170241702517026170271702817029170301703117032170331703417035170361703717038170391704017041170421704317044170451704617047170481704917050170511705217053170541705517056170571705817059170601706117062170631706417065170661706717068170691707017071170721707317074170751707617077170781707917080170811708217083170841708517086170871708817089170901709117092170931709417095170961709717098170991710017101171021710317104171051710617107171081710917110171111711217113171141711517116171171711817119171201712117122171231712417125171261712717128171291713017131171321713317134171351713617137171381713917140171411714217143171441714517146171471714817149171501715117152171531715417155171561715717158171591716017161171621716317164171651716617167171681716917170171711717217173171741717517176171771717817179171801718117182171831718417185171861718717188171891719017191171921719317194171951719617197171981719917200172011720217203172041720517206172071720817209172101721117212172131721417215172161721717218172191722017221172221722317224172251722617227172281722917230172311723217233172341723517236172371723817239172401724117242172431724417245172461724717248172491725017251172521725317254172551725617257172581725917260172611726217263172641726517266172671726817269172701727117272172731727417275172761727717278172791728017281172821728317284172851728617287172881728917290172911729217293172941729517296172971729817299173001730117302173031730417305173061730717308173091731017311173121731317314173151731617317173181731917320173211732217323173241732517326173271732817329173301733117332173331733417335173361733717338173391734017341173421734317344173451734617347173481734917350173511735217353173541735517356173571735817359173601736117362173631736417365173661736717368173691737017371173721737317374173751737617377173781737917380173811738217383173841738517386173871738817389173901739117392173931739417395173961739717398173991740017401174021740317404174051740617407174081740917410174111741217413174141741517416174171741817419174201742117422174231742417425174261742717428174291743017431174321743317434174351743617437174381743917440174411744217443174441744517446174471744817449174501745117452174531745417455174561745717458174591746017461174621746317464174651746617467174681746917470174711747217473174741747517476174771747817479174801748117482174831748417485174861748717488174891749017491174921749317494174951749617497174981749917500175011750217503175041750517506175071750817509175101751117512175131751417515175161751717518175191752017521175221752317524175251752617527175281752917530175311753217533175341753517536175371753817539175401754117542175431754417545175461754717548175491755017551175521755317554175551755617557175581755917560175611756217563175641756517566175671756817569175701757117572175731757417575175761757717578175791758017581175821758317584175851758617587175881758917590175911759217593175941759517596175971759817599176001760117602176031760417605176061760717608176091761017611176121761317614176151761617617176181761917620176211762217623176241762517626176271762817629176301763117632176331763417635176361763717638176391764017641176421764317644176451764617647176481764917650176511765217653176541765517656176571765817659176601766117662176631766417665176661766717668176691767017671176721767317674176751767617677176781767917680176811768217683176841768517686176871768817689176901769117692176931769417695176961769717698176991770017701177021770317704177051770617707177081770917710177111771217713177141771517716177171771817719177201772117722177231772417725177261772717728177291773017731177321773317734177351773617737177381773917740177411774217743177441774517746177471774817749177501775117752177531775417755177561775717758177591776017761177621776317764177651776617767177681776917770177711777217773177741777517776177771777817779177801778117782177831778417785177861778717788177891779017791177921779317794177951779617797177981779917800178011780217803178041780517806178071780817809178101781117812178131781417815178161781717818178191782017821178221782317824178251782617827178281782917830178311783217833178341783517836178371783817839178401784117842178431784417845178461784717848178491785017851178521785317854178551785617857178581785917860178611786217863178641786517866178671786817869178701787117872178731787417875178761787717878178791788017881178821788317884178851788617887178881788917890178911789217893178941789517896178971789817899179001790117902179031790417905179061790717908179091791017911179121791317914179151791617917179181791917920179211792217923179241792517926179271792817929179301793117932179331793417935179361793717938179391794017941179421794317944179451794617947179481794917950179511795217953179541795517956179571795817959179601796117962179631796417965179661796717968179691797017971179721797317974179751797617977179781797917980179811798217983179841798517986179871798817989179901799117992179931799417995179961799717998179991800018001180021800318004180051800618007180081800918010180111801218013180141801518016180171801818019180201802118022180231802418025180261802718028180291803018031180321803318034180351803618037180381803918040180411804218043180441804518046180471804818049180501805118052180531805418055180561805718058180591806018061180621806318064180651806618067180681806918070180711807218073180741807518076180771807818079180801808118082180831808418085180861808718088180891809018091180921809318094180951809618097180981809918100181011810218103181041810518106181071810818109181101811118112181131811418115181161811718118181191812018121181221812318124181251812618127181281812918130181311813218133181341813518136181371813818139181401814118142181431814418145181461814718148181491815018151181521815318154181551815618157181581815918160181611816218163181641816518166181671816818169181701817118172181731817418175181761817718178181791818018181181821818318184181851818618187181881818918190181911819218193181941819518196181971819818199182001820118202182031820418205182061820718208182091821018211182121821318214182151821618217182181821918220182211822218223182241822518226182271822818229182301823118232182331823418235182361823718238182391824018241182421824318244182451824618247182481824918250182511825218253182541825518256182571825818259182601826118262182631826418265182661826718268182691827018271182721827318274182751827618277182781827918280182811828218283182841828518286182871828818289182901829118292182931829418295182961829718298182991830018301183021830318304183051830618307183081830918310183111831218313183141831518316183171831818319183201832118322183231832418325183261832718328183291833018331183321833318334183351833618337183381833918340183411834218343183441834518346183471834818349183501835118352183531835418355183561835718358183591836018361183621836318364183651836618367183681836918370183711837218373183741837518376183771837818379183801838118382183831838418385183861838718388183891839018391183921839318394183951839618397183981839918400184011840218403184041840518406184071840818409184101841118412184131841418415184161841718418184191842018421184221842318424184251842618427184281842918430184311843218433184341843518436184371843818439184401844118442184431844418445184461844718448184491845018451184521845318454184551845618457184581845918460184611846218463184641846518466184671846818469184701847118472184731847418475184761847718478184791848018481184821848318484184851848618487184881848918490184911849218493184941849518496184971849818499185001850118502185031850418505185061850718508185091851018511185121851318514185151851618517185181851918520185211852218523185241852518526185271852818529185301853118532185331853418535185361853718538185391854018541185421854318544185451854618547185481854918550185511855218553185541855518556185571855818559185601856118562185631856418565185661856718568185691857018571185721857318574185751857618577185781857918580185811858218583185841858518586185871858818589185901859118592185931859418595185961859718598185991860018601186021860318604186051860618607186081860918610186111861218613186141861518616186171861818619186201862118622186231862418625186261862718628186291863018631186321863318634186351863618637186381863918640186411864218643186441864518646186471864818649186501865118652186531865418655186561865718658186591866018661186621866318664186651866618667186681866918670186711867218673186741867518676186771867818679186801868118682186831868418685186861868718688186891869018691186921869318694186951869618697186981869918700187011870218703187041870518706187071870818709187101871118712187131871418715187161871718718187191872018721187221872318724187251872618727187281872918730187311873218733187341873518736187371873818739187401874118742187431874418745187461874718748187491875018751187521875318754187551875618757187581875918760187611876218763187641876518766187671876818769187701877118772187731877418775187761877718778187791878018781187821878318784187851878618787187881878918790187911879218793187941879518796187971879818799188001880118802188031880418805188061880718808188091881018811188121881318814188151881618817188181881918820188211882218823188241882518826188271882818829188301883118832188331883418835188361883718838188391884018841188421884318844188451884618847188481884918850188511885218853188541885518856188571885818859188601886118862188631886418865188661886718868188691887018871188721887318874188751887618877188781887918880188811888218883188841888518886188871888818889188901889118892188931889418895188961889718898188991890018901189021890318904189051890618907189081890918910189111891218913189141891518916189171891818919189201892118922189231892418925189261892718928189291893018931189321893318934189351893618937189381893918940189411894218943189441894518946189471894818949189501895118952189531895418955189561895718958189591896018961189621896318964189651896618967189681896918970189711897218973189741897518976189771897818979189801898118982189831898418985189861898718988189891899018991189921899318994189951899618997189981899919000190011900219003190041900519006190071900819009190101901119012190131901419015190161901719018190191902019021190221902319024190251902619027190281902919030190311903219033190341903519036190371903819039190401904119042190431904419045190461904719048190491905019051190521905319054190551905619057190581905919060190611906219063190641906519066190671906819069190701907119072190731907419075190761907719078190791908019081190821908319084190851908619087190881908919090190911909219093190941909519096190971909819099191001910119102191031910419105191061910719108191091911019111191121911319114191151911619117191181911919120191211912219123191241912519126191271912819129191301913119132191331913419135191361913719138191391914019141191421914319144191451914619147191481914919150191511915219153191541915519156191571915819159191601916119162191631916419165191661916719168191691917019171191721917319174191751917619177191781917919180191811918219183191841918519186191871918819189191901919119192191931919419195191961919719198191991920019201192021920319204192051920619207192081920919210192111921219213192141921519216192171921819219192201922119222192231922419225192261922719228192291923019231192321923319234192351923619237192381923919240192411924219243192441924519246192471924819249192501925119252192531925419255192561925719258192591926019261192621926319264192651926619267192681926919270192711927219273192741927519276192771927819279192801928119282192831928419285192861928719288192891929019291192921929319294192951929619297192981929919300193011930219303193041930519306193071930819309193101931119312193131931419315193161931719318193191932019321193221932319324193251932619327193281932919330193311933219333193341933519336193371933819339193401934119342193431934419345193461934719348193491935019351193521935319354193551935619357193581935919360193611936219363193641936519366193671936819369193701937119372193731937419375193761937719378193791938019381193821938319384193851938619387193881938919390193911939219393193941939519396193971939819399194001940119402194031940419405194061940719408194091941019411194121941319414194151941619417194181941919420194211942219423194241942519426194271942819429194301943119432194331943419435194361943719438194391944019441194421944319444194451944619447194481944919450194511945219453194541945519456194571945819459194601946119462194631946419465194661946719468194691947019471194721947319474194751947619477194781947919480194811948219483194841948519486194871948819489194901949119492194931949419495194961949719498194991950019501195021950319504195051950619507195081950919510195111951219513195141951519516195171951819519195201952119522195231952419525195261952719528195291953019531195321953319534195351953619537195381953919540195411954219543195441954519546195471954819549195501955119552195531955419555195561955719558195591956019561195621956319564195651956619567195681956919570195711957219573195741957519576195771957819579195801958119582195831958419585195861958719588195891959019591195921959319594195951959619597195981959919600196011960219603196041960519606196071960819609196101961119612196131961419615196161961719618196191962019621196221962319624196251962619627196281962919630196311963219633196341963519636196371963819639196401964119642196431964419645196461964719648196491965019651196521965319654196551965619657196581965919660196611966219663196641966519666196671966819669196701967119672196731967419675196761967719678196791968019681196821968319684196851968619687196881968919690196911969219693196941969519696196971969819699197001970119702197031970419705197061970719708197091971019711197121971319714197151971619717197181971919720197211972219723197241972519726197271972819729197301973119732197331973419735197361973719738197391974019741197421974319744197451974619747197481974919750197511975219753197541975519756197571975819759197601976119762197631976419765197661976719768197691977019771197721977319774197751977619777197781977919780197811978219783197841978519786197871978819789197901979119792197931979419795197961979719798197991980019801198021980319804198051980619807198081980919810198111981219813198141981519816198171981819819198201982119822198231982419825198261982719828198291983019831198321983319834198351983619837198381983919840198411984219843198441984519846198471984819849198501985119852198531985419855198561985719858198591986019861198621986319864198651986619867198681986919870198711987219873198741987519876198771987819879198801988119882198831988419885198861988719888198891989019891198921989319894198951989619897198981989919900199011990219903199041990519906199071990819909199101991119912199131991419915199161991719918199191992019921199221992319924199251992619927199281992919930199311993219933199341993519936199371993819939199401994119942199431994419945199461994719948199491995019951199521995319954199551995619957199581995919960199611996219963199641996519966199671996819969199701997119972199731997419975199761997719978199791998019981199821998319984199851998619987199881998919990199911999219993199941999519996199971999819999200002000120002200032000420005200062000720008200092001020011200122001320014200152001620017200182001920020200212002220023200242002520026200272002820029200302003120032200332003420035200362003720038200392004020041200422004320044200452004620047200482004920050200512005220053200542005520056200572005820059200602006120062200632006420065200662006720068200692007020071200722007320074200752007620077200782007920080200812008220083200842008520086200872008820089200902009120092200932009420095200962009720098200992010020101201022010320104201052010620107201082010920110201112011220113201142011520116201172011820119201202012120122201232012420125201262012720128201292013020131201322013320134201352013620137201382013920140201412014220143201442014520146201472014820149201502015120152201532015420155201562015720158201592016020161201622016320164201652016620167201682016920170201712017220173201742017520176201772017820179201802018120182201832018420185201862018720188201892019020191201922019320194201952019620197201982019920200202012020220203202042020520206202072020820209202102021120212202132021420215202162021720218202192022020221202222022320224202252022620227202282022920230202312023220233202342023520236202372023820239202402024120242202432024420245202462024720248202492025020251202522025320254202552025620257202582025920260202612026220263202642026520266202672026820269202702027120272202732027420275202762027720278202792028020281202822028320284202852028620287202882028920290202912029220293202942029520296202972029820299203002030120302203032030420305203062030720308203092031020311203122031320314203152031620317203182031920320203212032220323203242032520326203272032820329203302033120332203332033420335203362033720338203392034020341203422034320344203452034620347203482034920350203512035220353203542035520356203572035820359203602036120362203632036420365203662036720368203692037020371203722037320374203752037620377203782037920380203812038220383203842038520386203872038820389203902039120392203932039420395203962039720398203992040020401204022040320404204052040620407204082040920410204112041220413204142041520416204172041820419204202042120422204232042420425204262042720428204292043020431204322043320434204352043620437204382043920440204412044220443204442044520446204472044820449204502045120452204532045420455204562045720458204592046020461204622046320464204652046620467204682046920470204712047220473204742047520476204772047820479204802048120482204832048420485204862048720488204892049020491204922049320494204952049620497204982049920500205012050220503205042050520506205072050820509205102051120512205132051420515205162051720518205192052020521205222052320524205252052620527205282052920530205312053220533205342053520536205372053820539205402054120542205432054420545205462054720548205492055020551205522055320554205552055620557205582055920560205612056220563205642056520566205672056820569205702057120572205732057420575205762057720578205792058020581205822058320584205852058620587205882058920590205912059220593205942059520596205972059820599206002060120602206032060420605206062060720608206092061020611206122061320614206152061620617206182061920620206212062220623206242062520626206272062820629206302063120632206332063420635206362063720638206392064020641206422064320644206452064620647206482064920650206512065220653206542065520656206572065820659206602066120662206632066420665206662066720668206692067020671206722067320674206752067620677206782067920680206812068220683206842068520686206872068820689206902069120692206932069420695206962069720698206992070020701207022070320704207052070620707207082070920710207112071220713207142071520716207172071820719207202072120722207232072420725207262072720728207292073020731207322073320734207352073620737207382073920740207412074220743207442074520746207472074820749207502075120752207532075420755207562075720758207592076020761207622076320764207652076620767207682076920770207712077220773207742077520776207772077820779207802078120782207832078420785207862078720788207892079020791207922079320794207952079620797207982079920800208012080220803208042080520806208072080820809208102081120812208132081420815208162081720818208192082020821208222082320824208252082620827208282082920830208312083220833208342083520836208372083820839208402084120842208432084420845208462084720848208492085020851208522085320854208552085620857208582085920860208612086220863208642086520866208672086820869208702087120872208732087420875208762087720878208792088020881208822088320884208852088620887208882088920890208912089220893208942089520896208972089820899209002090120902209032090420905209062090720908209092091020911209122091320914209152091620917209182091920920209212092220923209242092520926209272092820929209302093120932209332093420935209362093720938209392094020941209422094320944209452094620947209482094920950209512095220953209542095520956209572095820959209602096120962209632096420965209662096720968209692097020971209722097320974209752097620977209782097920980209812098220983209842098520986209872098820989209902099120992209932099420995209962099720998209992100021001210022100321004210052100621007210082100921010210112101221013210142101521016210172101821019210202102121022210232102421025210262102721028210292103021031210322103321034210352103621037210382103921040210412104221043210442104521046210472104821049210502105121052210532105421055210562105721058210592106021061210622106321064210652106621067210682106921070210712107221073210742107521076210772107821079210802108121082210832108421085210862108721088210892109021091210922109321094210952109621097210982109921100211012110221103211042110521106211072110821109211102111121112211132111421115211162111721118211192112021121211222112321124211252112621127211282112921130211312113221133211342113521136211372113821139211402114121142211432114421145211462114721148211492115021151211522115321154211552115621157211582115921160211612116221163211642116521166211672116821169211702117121172211732117421175211762117721178211792118021181211822118321184211852118621187211882118921190211912119221193211942119521196211972119821199212002120121202212032120421205212062120721208212092121021211212122121321214212152121621217212182121921220212212122221223212242122521226212272122821229212302123121232212332123421235212362123721238212392124021241212422124321244212452124621247212482124921250212512125221253212542125521256212572125821259212602126121262212632126421265212662126721268212692127021271212722127321274212752127621277212782127921280212812128221283212842128521286212872128821289212902129121292212932129421295212962129721298212992130021301213022130321304213052130621307213082130921310213112131221313213142131521316213172131821319213202132121322213232132421325213262132721328213292133021331213322133321334213352133621337213382133921340213412134221343213442134521346213472134821349213502135121352213532135421355213562135721358213592136021361213622136321364213652136621367213682136921370213712137221373213742137521376213772137821379213802138121382213832138421385213862138721388213892139021391213922139321394213952139621397213982139921400214012140221403214042140521406214072140821409214102141121412214132141421415214162141721418214192142021421214222142321424214252142621427214282142921430214312143221433214342143521436214372143821439214402144121442214432144421445214462144721448214492145021451214522145321454214552145621457214582145921460214612146221463214642146521466214672146821469214702147121472214732147421475214762147721478214792148021481214822148321484214852148621487214882148921490214912149221493214942149521496214972149821499215002150121502215032150421505215062150721508215092151021511215122151321514215152151621517215182151921520215212152221523215242152521526215272152821529215302153121532215332153421535215362153721538215392154021541215422154321544215452154621547215482154921550215512155221553215542155521556215572155821559215602156121562215632156421565215662156721568215692157021571215722157321574215752157621577215782157921580215812158221583215842158521586215872158821589215902159121592215932159421595215962159721598215992160021601216022160321604216052160621607216082160921610216112161221613216142161521616216172161821619216202162121622216232162421625216262162721628216292163021631216322163321634216352163621637216382163921640216412164221643216442164521646216472164821649216502165121652216532165421655216562165721658216592166021661216622166321664216652166621667216682166921670216712167221673216742167521676216772167821679216802168121682216832168421685216862168721688216892169021691216922169321694216952169621697216982169921700217012170221703217042170521706217072170821709217102171121712217132171421715217162171721718217192172021721217222172321724217252172621727217282172921730217312173221733217342173521736217372173821739217402174121742217432174421745217462174721748217492175021751217522175321754217552175621757217582175921760217612176221763217642176521766217672176821769217702177121772217732177421775217762177721778217792178021781217822178321784217852178621787217882178921790217912179221793217942179521796217972179821799218002180121802218032180421805218062180721808218092181021811218122181321814218152181621817218182181921820218212182221823218242182521826218272182821829218302183121832218332183421835218362183721838218392184021841218422184321844218452184621847218482184921850218512185221853218542185521856218572185821859218602186121862218632186421865218662186721868218692187021871218722187321874218752187621877218782187921880218812188221883218842188521886218872188821889218902189121892218932189421895218962189721898218992190021901219022190321904219052190621907219082190921910219112191221913219142191521916219172191821919219202192121922219232192421925219262192721928219292193021931219322193321934219352193621937219382193921940219412194221943219442194521946219472194821949219502195121952219532195421955219562195721958219592196021961219622196321964219652196621967219682196921970219712197221973219742197521976219772197821979219802198121982219832198421985219862198721988219892199021991219922199321994219952199621997219982199922000220012200222003220042200522006220072200822009220102201122012220132201422015220162201722018220192202022021220222202322024220252202622027220282202922030220312203222033220342203522036220372203822039220402204122042220432204422045220462204722048220492205022051220522205322054220552205622057220582205922060220612206222063220642206522066220672206822069220702207122072220732207422075220762207722078220792208022081220822208322084220852208622087220882208922090220912209222093220942209522096220972209822099221002210122102221032210422105221062210722108221092211022111221122211322114221152211622117221182211922120221212212222123221242212522126221272212822129221302213122132221332213422135221362213722138221392214022141221422214322144221452214622147221482214922150221512215222153221542215522156221572215822159221602216122162221632216422165221662216722168221692217022171221722217322174221752217622177221782217922180221812218222183221842218522186221872218822189221902219122192221932219422195221962219722198221992220022201222022220322204222052220622207222082220922210222112221222213222142221522216222172221822219222202222122222222232222422225222262222722228222292223022231222322223322234222352223622237222382223922240222412224222243222442224522246222472224822249222502225122252222532225422255222562225722258222592226022261222622226322264222652226622267222682226922270222712227222273222742227522276222772227822279222802228122282222832228422285222862228722288222892229022291222922229322294222952229622297222982229922300223012230222303223042230522306223072230822309223102231122312223132231422315223162231722318223192232022321223222232322324223252232622327223282232922330223312233222333223342233522336223372233822339223402234122342223432234422345223462234722348223492235022351223522235322354223552235622357223582235922360223612236222363223642236522366223672236822369223702237122372223732237422375223762237722378223792238022381223822238322384223852238622387223882238922390223912239222393223942239522396223972239822399224002240122402224032240422405224062240722408224092241022411224122241322414224152241622417224182241922420224212242222423224242242522426224272242822429224302243122432224332243422435224362243722438224392244022441224422244322444224452244622447224482244922450224512245222453224542245522456224572245822459224602246122462224632246422465224662246722468224692247022471224722247322474224752247622477224782247922480224812248222483224842248522486224872248822489224902249122492224932249422495224962249722498224992250022501225022250322504225052250622507225082250922510225112251222513225142251522516225172251822519225202252122522225232252422525225262252722528225292253022531225322253322534225352253622537225382253922540225412254222543225442254522546225472254822549225502255122552225532255422555225562255722558225592256022561225622256322564225652256622567225682256922570225712257222573225742257522576225772257822579225802258122582225832258422585225862258722588225892259022591225922259322594225952259622597225982259922600226012260222603226042260522606226072260822609226102261122612226132261422615226162261722618226192262022621226222262322624226252262622627226282262922630226312263222633226342263522636226372263822639226402264122642226432264422645226462264722648226492265022651226522265322654226552265622657226582265922660226612266222663226642266522666226672266822669226702267122672226732267422675226762267722678226792268022681226822268322684226852268622687226882268922690226912269222693226942269522696226972269822699227002270122702227032270422705227062270722708227092271022711227122271322714227152271622717227182271922720227212272222723227242272522726227272272822729227302273122732227332273422735227362273722738227392274022741227422274322744227452274622747227482274922750227512275222753227542275522756227572275822759227602276122762227632276422765227662276722768227692277022771227722277322774227752277622777227782277922780227812278222783227842278522786227872278822789227902279122792227932279422795227962279722798227992280022801228022280322804228052280622807228082280922810228112281222813228142281522816228172281822819228202282122822228232282422825228262282722828228292283022831228322283322834228352283622837228382283922840228412284222843228442284522846228472284822849228502285122852228532285422855228562285722858228592286022861228622286322864228652286622867228682286922870228712287222873228742287522876228772287822879228802288122882228832288422885228862288722888228892289022891228922289322894228952289622897228982289922900229012290222903229042290522906229072290822909229102291122912229132291422915229162291722918229192292022921229222292322924229252292622927229282292922930229312293222933229342293522936229372293822939229402294122942229432294422945229462294722948229492295022951229522295322954229552295622957229582295922960229612296222963229642296522966229672296822969229702297122972229732297422975229762297722978229792298022981229822298322984229852298622987229882298922990229912299222993229942299522996229972299822999230002300123002230032300423005230062300723008230092301023011230122301323014230152301623017230182301923020230212302223023230242302523026230272302823029230302303123032230332303423035230362303723038230392304023041230422304323044230452304623047230482304923050230512305223053230542305523056230572305823059230602306123062230632306423065230662306723068230692307023071230722307323074230752307623077230782307923080230812308223083230842308523086230872308823089230902309123092230932309423095230962309723098230992310023101231022310323104231052310623107231082310923110231112311223113231142311523116231172311823119231202312123122231232312423125231262312723128231292313023131231322313323134231352313623137231382313923140231412314223143231442314523146231472314823149231502315123152231532315423155231562315723158231592316023161231622316323164231652316623167231682316923170231712317223173231742317523176231772317823179231802318123182231832318423185231862318723188231892319023191231922319323194231952319623197231982319923200232012320223203232042320523206232072320823209232102321123212232132321423215232162321723218232192322023221232222322323224232252322623227232282322923230232312323223233232342323523236232372323823239232402324123242232432324423245232462324723248232492325023251232522325323254232552325623257232582325923260232612326223263232642326523266232672326823269232702327123272232732327423275232762327723278232792328023281232822328323284232852328623287232882328923290232912329223293232942329523296232972329823299233002330123302233032330423305233062330723308233092331023311233122331323314233152331623317233182331923320233212332223323233242332523326233272332823329233302333123332233332333423335233362333723338233392334023341233422334323344233452334623347233482334923350233512335223353233542335523356233572335823359233602336123362233632336423365233662336723368233692337023371233722337323374233752337623377233782337923380233812338223383233842338523386233872338823389233902339123392233932339423395233962339723398233992340023401234022340323404234052340623407234082340923410234112341223413234142341523416234172341823419234202342123422234232342423425234262342723428234292343023431234322343323434234352343623437234382343923440234412344223443234442344523446234472344823449234502345123452234532345423455234562345723458234592346023461234622346323464234652346623467234682346923470234712347223473234742347523476234772347823479234802348123482234832348423485234862348723488234892349023491234922349323494234952349623497234982349923500235012350223503235042350523506235072350823509235102351123512235132351423515235162351723518235192352023521235222352323524235252352623527235282352923530235312353223533235342353523536235372353823539235402354123542235432354423545235462354723548235492355023551235522355323554235552355623557235582355923560235612356223563235642356523566235672356823569235702357123572235732357423575235762357723578235792358023581235822358323584235852358623587235882358923590235912359223593235942359523596235972359823599236002360123602236032360423605236062360723608236092361023611236122361323614236152361623617236182361923620236212362223623236242362523626236272362823629236302363123632236332363423635236362363723638236392364023641236422364323644236452364623647236482364923650236512365223653236542365523656236572365823659236602366123662236632366423665236662366723668236692367023671236722367323674236752367623677236782367923680236812368223683236842368523686236872368823689236902369123692236932369423695236962369723698236992370023701237022370323704237052370623707237082370923710237112371223713237142371523716237172371823719237202372123722237232372423725237262372723728237292373023731237322373323734237352373623737237382373923740237412374223743237442374523746237472374823749237502375123752237532375423755237562375723758237592376023761237622376323764237652376623767237682376923770237712377223773237742377523776237772377823779237802378123782237832378423785237862378723788237892379023791237922379323794237952379623797237982379923800238012380223803238042380523806238072380823809238102381123812238132381423815238162381723818238192382023821238222382323824238252382623827238282382923830238312383223833238342383523836238372383823839238402384123842238432384423845238462384723848238492385023851238522385323854238552385623857238582385923860238612386223863238642386523866238672386823869238702387123872238732387423875238762387723878238792388023881238822388323884238852388623887238882388923890238912389223893238942389523896238972389823899239002390123902239032390423905239062390723908239092391023911239122391323914239152391623917239182391923920239212392223923239242392523926239272392823929239302393123932239332393423935239362393723938239392394023941239422394323944239452394623947239482394923950239512395223953239542395523956239572395823959239602396123962239632396423965239662396723968239692397023971239722397323974239752397623977239782397923980239812398223983239842398523986239872398823989239902399123992239932399423995239962399723998239992400024001240022400324004240052400624007240082400924010240112401224013240142401524016240172401824019240202402124022240232402424025240262402724028240292403024031240322403324034240352403624037240382403924040240412404224043240442404524046240472404824049240502405124052240532405424055240562405724058240592406024061240622406324064240652406624067240682406924070240712407224073240742407524076240772407824079240802408124082240832408424085240862408724088240892409024091240922409324094240952409624097240982409924100241012410224103241042410524106241072410824109241102411124112241132411424115241162411724118241192412024121241222412324124241252412624127241282412924130241312413224133241342413524136241372413824139241402414124142241432414424145241462414724148241492415024151241522415324154241552415624157241582415924160241612416224163241642416524166241672416824169241702417124172241732417424175241762417724178241792418024181241822418324184241852418624187241882418924190241912419224193241942419524196241972419824199242002420124202242032420424205242062420724208242092421024211242122421324214242152421624217242182421924220242212422224223242242422524226242272422824229242302423124232242332423424235242362423724238242392424024241242422424324244242452424624247242482424924250242512425224253242542425524256242572425824259242602426124262242632426424265242662426724268242692427024271242722427324274242752427624277242782427924280242812428224283242842428524286242872428824289242902429124292242932429424295242962429724298242992430024301243022430324304243052430624307243082430924310243112431224313243142431524316243172431824319243202432124322243232432424325243262432724328243292433024331243322433324334243352433624337243382433924340243412434224343243442434524346243472434824349243502435124352243532435424355243562435724358243592436024361243622436324364243652436624367243682436924370243712437224373243742437524376243772437824379243802438124382243832438424385243862438724388243892439024391243922439324394243952439624397243982439924400244012440224403244042440524406244072440824409244102441124412244132441424415244162441724418244192442024421244222442324424244252442624427244282442924430244312443224433244342443524436244372443824439244402444124442244432444424445244462444724448244492445024451244522445324454244552445624457244582445924460244612446224463244642446524466244672446824469244702447124472244732447424475244762447724478244792448024481244822448324484244852448624487244882448924490244912449224493244942449524496244972449824499245002450124502245032450424505245062450724508245092451024511245122451324514245152451624517245182451924520245212452224523245242452524526245272452824529245302453124532245332453424535245362453724538245392454024541245422454324544245452454624547245482454924550245512455224553245542455524556245572455824559245602456124562245632456424565245662456724568245692457024571245722457324574245752457624577245782457924580245812458224583245842458524586245872458824589245902459124592245932459424595245962459724598245992460024601246022460324604246052460624607246082460924610246112461224613246142461524616246172461824619246202462124622246232462424625246262462724628246292463024631246322463324634246352463624637246382463924640246412464224643246442464524646246472464824649246502465124652246532465424655246562465724658246592466024661246622466324664246652466624667246682466924670246712467224673246742467524676246772467824679246802468124682246832468424685246862468724688246892469024691246922469324694246952469624697246982469924700247012470224703247042470524706247072470824709247102471124712247132471424715247162471724718247192472024721247222472324724247252472624727247282472924730247312473224733247342473524736247372473824739247402474124742247432474424745247462474724748247492475024751247522475324754247552475624757247582475924760247612476224763247642476524766247672476824769247702477124772247732477424775247762477724778247792478024781247822478324784247852478624787247882478924790247912479224793247942479524796247972479824799248002480124802248032480424805248062480724808248092481024811248122481324814248152481624817248182481924820248212482224823248242482524826248272482824829248302483124832248332483424835248362483724838248392484024841248422484324844248452484624847248482484924850248512485224853248542485524856248572485824859248602486124862248632486424865248662486724868248692487024871248722487324874248752487624877248782487924880248812488224883248842488524886248872488824889248902489124892248932489424895248962489724898248992490024901249022490324904249052490624907249082490924910249112491224913249142491524916249172491824919249202492124922249232492424925249262492724928249292493024931249322493324934
							/*
 * Copyright (c) 2019 Nuclei Limited. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef __CORE_FEATURE_DSP__
#define __CORE_FEATURE_DSP__

/*!
 * @file     core_feature_dsp.h
 * @brief    DSP feature API header file for Nuclei N/NX Core
 */
/*
 * DSP Feature Configuration Macro:
 * 1. __DSP_PRESENT:  Define whether Digital Signal Processing Unit(DSP) is present or not
 *   * 0: Not present
 *   * 1: Present
 */
#ifdef __cplusplus
 extern "C" {
#endif

#include "core_feature_base.h"

#if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1)

#if defined(__INC_INTRINSIC_API) && (__INC_INTRINSIC_API == 1)
#ifndef __ICCRISCV__
#include <rvp_intrinsic.h>
#endif
#endif

#ifndef __ICCRISCV__
/* ###########################  CPU SIMD DSP Intrinsic Functions ########################### */
/**
 * \defgroup NMSIS_Core_DSP_Intrinsic   Intrinsic Functions for SIMD Instructions
 * \ingroup  NMSIS_Core
 * \brief    Functions that generate RISC-V DSP SIMD instructions.
 * \details
 *
 * The following functions generate specified RISC-V SIMD instructions that cannot be directly accessed by compiler.
 * * **DSP ISA Extension Instruction Summary**
 *   + **Shorthand Definitions**
 *     - r.H == rH1: r[31:16], r.L == r.H0: r[15:0]
 *     - r.B3: r[31:24], r.B2: r[23:16], r.B1: r[15:8], r.B0: r[7:0]
 *     - r.B[x]: r[(x*8+7):(x*8+0)]
 *     - r.H[x]: r[(x*16+7):(x*16+0)]
 *     - r.W[x]: r[(x*32+31):(x*32+0)]
 *     - r[xU]: the upper 32-bit of a 64-bit number; xU represents the GPR number that contains this upper part 32-bit value.
 *     - r[xL]: the lower 32-bit of a 64-bit number; xL represents the GPR number that contains this lower part 32-bit value.
 *     - r[xU].r[xL]: a 64-bit number that is formed from a pair of GPRs.
 *     - s>>: signed arithmetic right shift:
 *     - u>>: unsigned logical right shift
 *     - SAT.Qn(): Saturate to the range of [-2^n, 2^n-1], if saturation happens, set PSW.OV.
 *     - SAT.Um(): Saturate to the range of [0, 2^m-1], if saturation happens, set PSW.OV.
 *     - RUND(): Indicate `rounding`, i.e., add 1 to the most significant discarded bit for right shift or MSW-type multiplication instructions.
 *     - Sign or Zero Extending functions:
 *       - SEm(data): Sign-Extend data to m-bit.:
 *       - ZEm(data): Zero-Extend data to m-bit.
 *     - ABS(x): Calculate the absolute value of `x`.
 *     - CONCAT(x,y): Concatinate `x` and `y` to form a value.
 *     - u<: Unsinged less than comparison.
 *     - u<=: Unsinged less than & equal comparison.
 *     - u>: Unsinged greater than comparison.
 *     - s*: Signed multiplication.
 *     - u*: Unsigned multiplication.
 *
 *   @{
 */
/** @} */ /* End of Doxygen Group NMSIS_Core_DSP_Intrinsic */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS      SIMD Data Processing Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    SIMD Data Processing Instructions
 * \details
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB      SIMD 16-bit Add/Subtract Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 16-bit Add/Subtract Instructions
 * \details
 * Based on the combination of the types of the two 16-bit arithmetic operations, the SIMD 16-bit
 * add/subtract instructions can be classified into 6 main categories: Addition (two 16-bit addition),
 * Subtraction (two 16-bit subtraction), Crossed Add & Sub (one addition and one subtraction), and
 * Crossed Sub & Add (one subtraction and one addition), Straight Add & Sub (one addition and one
 * subtraction), and Straight Sub & Add (one subtraction and one addition).
 * Based on the way of how an overflow condition is handled, the SIMD 16-bit add/subtract
 * instructions can be classified into 5 groups: Wrap-around (dropping overflow), Signed Halving
 * (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed Saturation (clipping overflow),
 * and Unsigned Saturation.
 * Together, there are 30 SIMD 16-bit add/subtract instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB      SIMD 8-bit Addition & Subtraction Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 8-bit Addition & Subtraction Instructions
 * \details
 * Based on the types of the four 8-bit arithmetic operations, the SIMD 8-bit add/subtract instructions
 * can be classified into 2 main categories: Addition (four 8-bit addition), and Subtraction (four 8-bit
 * subtraction).
 * Based on the way of how an overflow condition is handled for singed or unsigned operation, the
 * SIMD 8-bit add/subtract instructions can be classified into 5 groups: Wrap-around (dropping
 * overflow), Signed Halving (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed
 * Saturation (clipping overflow), and Unsigned Saturation.
 * Together, there are 10 SIMD 8-bit add/subtract instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT      SIMD 16-bit Shift Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 16-bit Shift Instructions
 * \details
 * there are 14 SIMD 16-bit shift instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT      SIMD 8-bit Shift Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 8-bit Shift Instructions
 * \details
 *  there are 14 SIMD 8-bit shift instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP      SIMD 16-bit Compare Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 16-bit Compare Instructions
 * \details
 *  there are 5 SIMD 16-bit Compare instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP      SIMD 8-bit Compare Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 8-bit Compare Instructions
 * \details
 *  there are 5  SIMD 8-bit Compare instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY      SIMD 16-bit Multiply Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 16-bit Multiply Instructions
 * \details
 * there are 6 SIMD 16-bit Multiply instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY      SIMD 8-bit Multiply Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 8-bit Multiply Instructions
 * \details
 *  there are 6 SIMD 8-bit Multiply instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC      SIMD 16-bit Miscellaneous Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 16-bit Miscellaneous Instructions
 * \details
 *  there are 10 SIMD 16-bit Misc instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC      SIMD 8-bit Miscellaneous Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 8-bit Miscellaneous Instructions
 * \details
 *  there are 10 SIMD 8-bit Miscellaneous instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK      SIMD 8-bit Unpacking Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
 * \brief    SIMD 8-bit Unpacking Instructions
 * \details
 *  there are 8 SIMD 8-bit Unpacking instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD      Non-SIMD Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    Non-SIMD Instructions
 * \details
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU      Non-SIMD Q15 saturation ALU Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
 * \brief    Non-SIMD Q15 saturation ALU Instructions
 * \details
 * there are 7 Non-SIMD Q15 saturation ALU Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU      Non-SIMD Q31 saturation ALU Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
 * \brief    Non-SIMD Q31 saturation ALU Instructions
 * \details
 *  there are Non-SIMD Q31 saturation ALU Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION      32-bit Computation Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
 * \brief    32-bit Computation Instructions
 * \details
 * there are 8 32-bit Computation Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC      OV (Overflow) flag Set/Clear Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
 * \brief    OV (Overflow) flag Set/Clear Instructions
 * \details
 * The following table lists the user instructions related to Overflow (OV) flag manipulation. there are 2 OV (Overflow) flag Set/Clear Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC      Non-SIMD Miscellaneous Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
 * \brief    Non-SIMD Miscellaneous Instructions
 * \details
 * There are 13 Miscellaneous Instructions here.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS      Partial-SIMD Data Processing Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    Partial-SIMD Data Processing Instructions
 * \details
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK      SIMD 16-bit Packing Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
 * \brief    SIMD 16-bit Packing Instructions
 * \details
 * there are 4 SIMD16-bit Packing Instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC      Signed MSW 32x32 Multiply and Add Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
 * \brief    Signed MSW 32x32 Multiply and Add Instructions
 * \details
 *  there are 8 Signed MSW 32x32 Multiply and Add Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC      Signed MSW 32x16 Multiply and Add Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
 * \brief    Signed MSW 32x16 Multiply and Add Instructions
 * \details
 * there are 15 Signed MSW 32x16 Multiply and Add Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB      Signed 16-bit Multiply 32-bit Add/Subtract Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
 * \brief    Signed 16-bit Multiply 32-bit Add/Subtract Instructions
 * \details
 *  there are 18 Signed 16-bit Multiply 32-bit Add/Subtract Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB      Signed 16-bit Multiply 64-bit Add/Subtract Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
 * \brief    Signed 16-bit Multiply 64-bit Add/Subtract Instructions
 * \details
 *  there is Signed 16-bit Multiply 64-bit Add/Subtract Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC      Partial-SIMD Miscellaneous Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
 * \brief    Partial-SIMD Miscellaneous Instructions
 * \details
 *  there are  7 Partial-SIMD Miscellaneous Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD      8-bit Multiply with 32-bit Add Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
 * \brief    8-bit Multiply with 32-bit Add Instructions
 * \details
 * there are  3 8-bit Multiply with 32-bit Add Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE      64-bit Profile Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    64-bit Profile Instructions
 * \details
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB      64-bit Addition & Subtraction Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_64B_PROFILE
 * \brief    64-bit Addition & Subtraction Instructions
 * \details
 * there are 10 64-bit Addition & Subtraction Instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB      32-bit Multiply with 64-bit Add/Subtract Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_64B_PROFILE
 * \brief    32-bit Multiply with 64-bit Add/Subtract Instructions
 * \details
 *  there are 32-bit Multiply 64-bit Add/Subtract Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB      Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_64B_PROFILE
 * \brief    Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
 * \details
 * there are 10 Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY      RV64 Only Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    RV64 Only Instructions
 * \details
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB      (RV64 Only) SIMD 32-bit Add/Subtract Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) SIMD 32-bit Add/Subtract Instructions
 * \details
 * The following tables list instructions that are only present in RV64.
 * There are 30 SIMD 32-bit addition or subtraction instructions.there are 4 SIMD16-bit Packing Instructions.
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT      (RV64 Only) SIMD 32-bit Shift Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) SIMD 32-bit Shift Instructions
 * \details
 *  there are 14 (RV64 Only) SIMD 32-bit Shift Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC      (RV64 Only) SIMD 32-bit Miscellaneous Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) SIMD 32-bit Miscellaneous Instructions
 * \details
 * there are 5  (RV64 Only) SIMD 32-bit Miscellaneous Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT      (RV64 Only) SIMD Q15 Saturating Multiply Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) SIMD Q15 Saturating Multiply Instructions
 * \details
 *  there are 9 (RV64 Only) SIMD Q15 saturating Multiply Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT      (RV64 Only) 32-bit Multiply Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) 32-bit Multiply Instructions
 * \details
 *  there is 3 RV64 Only) 32-bit Multiply Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD      (RV64 Only) 32-bit Multiply & Add Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) 32-bit Multiply & Add Instructions
 * \details
 *  there are  3 (RV64 Only) 32-bit Multiply & Add Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC      (RV64 Only) 32-bit Parallel Multiply & Add Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) 32-bit Parallel Multiply & Add Instructions
 * \details
 * there are 12 (RV64 Only) 32-bit Parallel Multiply & Add Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT      (RV64 Only) Non-SIMD 32-bit Shift Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    (RV64 Only) Non-SIMD 32-bit Shift Instructions
 * \details
 *  there are 1  (RV64 Only) Non-SIMD 32-bit Shift Instructions
 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK      32-bit Packing Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
 * \brief    32-bit Packing Instructions
 * \details
 *  There are four 32-bit packing instructions here
 */

/* ===== Inline Function Start for 3.1. ADD8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief ADD8 (SIMD 8-bit Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * ADD8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit integer element additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 8-bit integer elements in Rs1 with the 8-bit integer elements
 * in Rs2, and then writes the 8-bit element results to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned addition.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = Rs1.B[x] + Rs2.B[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ADD8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("add8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.1. ADD8 ===== */

/* ===== Inline Function Start for 3.2. ADD16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief ADD16 (SIMD 16-bit Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * ADD16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 16-bit integer elements in Rs1 with the 16-bit integer
 * elements in Rs2, and then writes the 16-bit element results to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned addition.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = Rs1.H[x] + Rs2.H[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ADD16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("add16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.2. ADD16 ===== */

/* ===== Inline Function Start for 3.3. ADD64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief ADD64 (64-bit Addition)
 * \details
 * **Type**: 64-bit Profile
 *
 * **Syntax**:\n
 * ~~~
 * ADD64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add two 64-bit signed or unsigned integers.
 *
 * **RV32 Description**:\n
 * This instruction adds the 64-bit integer of an even/odd pair of registers specified
 * by Rs1(4,1) with the 64-bit integer of an even/odd pair of registers specified by Rs2(4,1), and then
 * writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
 * pair includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction has the same behavior as the ADD instruction in RV64I.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned addition.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 *  t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 *  a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
 *  b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
 *  R[t_H].R[t_L] = R[a_H].R[a_L] + R[b_H].R[b_L];
 * RV64:
 *  Rd = Rs1 + Rs2;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_ADD64(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("add64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.3. ADD64 ===== */

/* ===== Inline Function Start for 3.4. AVE ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief AVE (Average with Rounding)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * AVE Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Calculate the average of the contents of two general registers.
 *
 * **Description**:\n
 * This instruction calculates the average value of two signed integers stored in Rs1 and
 * Rs2, rounds up a half-integer result to the nearest integer, and writes the result to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Sum = CONCAT(Rs1[MSB],Rs1[MSB:0]) + CONCAT(Rs2[MSB],Rs2[MSB:0]) + 1;
 * Rd = Sum[(MSB+1):1];
 * for RV32: MSB=31,
 * for RV64: MSB=63
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_AVE(long a, long b)
{
    long result;
    __ASM volatile("ave %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.4. AVE ===== */

/* ===== Inline Function Start for 3.5. BITREV ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief BITREV (Bit Reverse)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * BITREV Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Reverse the bit positions of the source operand within a specified width starting from bit
 * 0. The reversed width is a variable from a GPR.
 *
 * **Description**:\n
 * This instruction reverses the bit positions of the content of Rs1. The reversed bit width
 * is calculated as Rs2[4:0]+1 (RV32) or Rs2[5:0]+1 (RV64). The upper bits beyond the reversed width
 * are filled with zeros. After the bit reverse operation, the result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * msb = Rs2[4:0]; (for RV32)
 * msb = Rs2[5:0]; (for RV64)
 * rev[0:msb] = Rs1[msb:0];
 * Rd = ZE(rev[msb:0]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_BITREV(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("bitrev %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.5. BITREV ===== */

/* ===== Inline Function Start for 3.6. BITREVI ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief BITREVI (Bit Reverse Immediate)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * (RV32) BITREVI Rd, Rs1, imm[4:0]
 * (RV64) BITREVI Rd, Rs1, imm[5:0]
 * ~~~
 *
 * **Purpose**:\n
 * Reverse the bit positions of the source operand within a specified width starting from bit
 * 0. The reversed width is an immediate value.
 *
 * **Description**:\n
 * This instruction reverses the bit positions of the content of Rs1. The reversed bit width
 * is calculated as imm[4:0]+1 (RV32) or imm[5:0]+1 (RV64). The upper bits beyond the reversed width
 * are filled with zeros. After the bit reverse operation, the result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * msb = imm[4:0]; (RV32)
 * msb = imm[5:0]; (RV64)
 * rev[0:msb] = Rs1[msb:0];
 * Rd = ZE(rev[msb:0]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_BITREVI(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("bitrevi %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.6. BITREVI ===== */

/* ===== Inline Function Start for 3.7. BPICK ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief BPICK (Bit-wise Pick)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * BPICK Rd, Rs1, Rs2, Rc
 * ~~~
 *
 * **Purpose**:\n
 * Select from two source operands based on a bit mask in the third operand.
 *
 * **Description**:\n
 * This instruction selects individual bits from Rs1 or Rs2, based on the bit mask value in
 * Rc. If a bit in Rc is 1, the corresponding bit is from Rs1; otherwise, the corresponding bit is from Rs2.
 * The selection results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd[x] = Rc[x]? Rs1[x] : Rs2[x];
 * for RV32, x=31...0
 * for RV64, x=63...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \param [in]  c    unsigned long type of value stored in c
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_BPICK(unsigned long a, unsigned long b, unsigned long c)
{
    unsigned long result;
    __ASM volatile("bpick %0, %1, %2, %3" : "=r"(result) : "r"(a), "r"(b), "r"(c));
    return result;
}
/* ===== Inline Function End for 3.7. BPICK ===== */

/* ===== Inline Function Start for 3.8. CLROV ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC
 * \brief CLROV (Clear OV flag)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * CLROV # pseudo mnemonic
 * ~~~
 *
 * **Purpose**:\n
 * This pseudo instruction is an alias to `CSRRCI x0, ucode, 1` instruction.
 *
 *
 */
__STATIC_FORCEINLINE void __RV_CLROV(void)
{
    __ASM volatile("clrov ");
}
/* ===== Inline Function End for 3.8. CLROV ===== */

/* ===== Inline Function Start for 3.9. CLRS8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief CLRS8 (SIMD 8-bit Count Leading Redundant Sign)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLRS8 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of redundant sign bits of the 8-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the bits next to the sign bits of the 8-bit elements of Rs1, this instruction
 * counts the number of redundant sign bits and writes the result to the corresponding 8-bit elements
 * of Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.B[x];
 * cnt[x] = 0;
 * for (i = 6 to 0) {
 *   if (snum[x](i) == snum[x](7)) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.B[x] = cnt[x];
 * for RV32: x=3...0
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLRS8(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clrs8 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.9. CLRS8 ===== */

/* ===== Inline Function Start for 3.10. CLRS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief CLRS16 (SIMD 16-bit Count Leading Redundant Sign)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLRS16 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of redundant sign bits of the 16-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the bits next to the sign bits of the 16-bit elements of Rs1, this
 * instruction counts the number of redundant sign bits and writes the result to the corresponding 16-
 * bit elements of Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.H[x];
 * cnt[x] = 0;
 * for (i = 14 to 0) {
 *   if (snum[x](i) == snum[x](15)) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.H[x] = cnt[x];
 * for RV32: x=1...0
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLRS16(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clrs16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.10. CLRS16 ===== */

/* ===== Inline Function Start for 3.11. CLRS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
 * \brief CLRS32 (SIMD 32-bit Count Leading Redundant Sign)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLRS32 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of redundant sign bits of the 32-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the bits next to the sign bits of the 32-bit elements of Rs1, this
 * instruction counts the number of redundant sign bits and writes the result to the corresponding 32-
 * bit elements of Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.W[x];
 * cnt[x] = 0;
 * for (i = 30 to 0) {
 *   if (snum[x](i) == snum[x](31)) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.W[x] = cnt[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLRS32(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clrs32 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.11. CLRS32 ===== */

/* ===== Inline Function Start for 3.12. CLO8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief CLO8 (SIMD 8-bit Count Leading One)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLO8 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of leading one bits of the 8-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction
 * counts the number of leading one bits and writes the results to the corresponding 8-bit elements of
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.B[x];
 * cnt[x] = 0;
 *   for (i = 7 to 0) {
 *   if (snum[x](i) == 1) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.B[x] = cnt[x];
 * for RV32: x=3...0
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLO8(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clo8 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.12. CLO8 ===== */

/* ===== Inline Function Start for 3.13. CLO16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief CLO16 (SIMD 16-bit Count Leading One)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLO16 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of leading one bits of the 16-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction
 * counts the number of leading one bits and writes the results to the corresponding 16-bit elements
 * of Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.H[x];
 * cnt[x] = 0;
 * for (i = 15 to 0) {
 *   if (snum[x](i) == 1) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.H[x] = cnt[x];
 * for RV32: x=1...0
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLO16(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clo16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.13. CLO16 ===== */

/* ===== Inline Function Start for 3.14. CLO32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
 * \brief CLO32 (SIMD 32-bit Count Leading One)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLO32 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of leading one bits of the 32-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction
 * counts the number of leading one bits and writes the results to the corresponding 32-bit elements
 * of Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.W[x];
 * cnt[x] = 0;
 * for (i = 31 to 0) {
 *   if (snum[x](i) == 1) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.W[x] = cnt[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLO32(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clo32 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.14. CLO32 ===== */

/* ===== Inline Function Start for 3.15. CLZ8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief CLZ8 (SIMD 8-bit Count Leading Zero)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLZ8 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of leading zero bits of the 8-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction
 * counts the number of leading zero bits and writes the results to the corresponding 8-bit elements of
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.B[x];
 * cnt[x] = 0;
 * for (i = 7 to 0) {
 *   if (snum[x](i) == 0) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.B[x] = cnt[x];
 * for RV32: x=3...0
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLZ8(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clz8 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.15. CLZ8 ===== */

/* ===== Inline Function Start for 3.16. CLZ16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief CLZ16 (SIMD 16-bit Count Leading Zero)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLZ16 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of leading zero bits of the 16-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction
 * counts the number of leading zero bits and writes the results to the corresponding 16-bit elements
 * of Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.H[x];
 * cnt[x] = 0;
 * for (i = 15 to 0) {
 *   if (snum[x](i) == 0) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.H[x] = cnt[x];
 * for RV32: x=1...0
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLZ16(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clz16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.16. CLZ16 ===== */

/* ===== Inline Function Start for 3.17. CLZ32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
 * \brief CLZ32 (SIMD 32-bit Count Leading Zero)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CLZ32 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Count the number of leading zero bits of the 32-bit elements of a general register.
 *
 * **Description**:\n
 * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction
 * counts the number of leading zero bits and writes the results to the corresponding 32-bit elements
 * of Rd.
 *
 * **Operations**:\n
 * ~~~
 * snum[x] = Rs1.W[x];
 * cnt[x] = 0;
 * for (i = 31 to 0) {
 *   if (snum[x](i) == 0) {
 *     cnt[x] = cnt[x] + 1;
 *   } else {
 *     break;
 *   }
 * }
 * Rd.W[x] = cnt[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CLZ32(unsigned long a)
{
    unsigned long result;
    __ASM volatile("clz32 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.17. CLZ32 ===== */

/* ===== Inline Function Start for 3.18. CMPEQ8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
 * \brief CMPEQ8 (SIMD 8-bit Integer Compare Equal)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CMPEQ8 Rs, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit integer elements equal comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit integer elements in Rs1 with the 8-bit integer
 * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFF; otherwise, the result is
 * 0x0. The 8-bit element comparison results are written to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned numbers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] == Rs2.B[x])? 0xff : 0x0;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CMPEQ8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("cmpeq8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.18. CMPEQ8 ===== */

/* ===== Inline Function Start for 3.19. CMPEQ16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
 * \brief CMPEQ16 (SIMD 16-bit Integer Compare Equal)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CMPEQ16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer elements equal comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit integer elements in Rs1 with the 16-bit integer
 * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFFFF; otherwise, the result
 * is 0x0. The 16-bit element comparison results are written to Rt.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned numbers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] == Rs2.H[x])? 0xffff : 0x0;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CMPEQ16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("cmpeq16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.19. CMPEQ16 ===== */

/* ===== Inline Function Start for 3.20. CRAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief CRAS16 (SIMD 16-bit Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CRAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit
 * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with
 * the 16-bit integer element in [15:0] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit
 * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [31:16] of 32-bit chunks in
 * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32-
 * bit chunks in Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][15:0];
 * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][31:16];
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CRAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("cras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.20. CRAS16 ===== */

/* ===== Inline Function Start for 3.21. CRSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief CRSA16 (SIMD 16-bit Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * CRSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit
 * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit integer element in [15:0] of 32-bit chunks in Rs2
 * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of
 * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [31:16] of 32-bit chunks
 * in Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to
 * [15:0] of 32-bit chunks in Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][15:0];
 * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][31:16];
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CRSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("crsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.21. CRSA16 ===== */

/* ===== Inline Function Start for 3.22. INSB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief INSB (Insert Byte)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * (RV32) INSB Rd, Rs1, imm[1:0]
 * (RV64) INSB Rd, Rs1, imm[2:0]
 * ~~~
 *
 * **Purpose**:\n
 * Insert byte 0 of a 32-bit or 64-bit register into one of the byte elements of another register.
 *
 * **Description**:\n
 * This instruction inserts byte 0 of Rs1 into byte `imm[1:0]` (RV32) or `imm[2:0]` (RV64)
 * of Rd.
 *
 * **Operations**:\n
 * ~~~
 * bpos = imm[1:0]; (RV32)
 * bpos = imm[2:0]; (RV64)
 * Rd.B[bpos] = Rs1.B[0]
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_INSB(t, a, b)    \
    ({    \
        unsigned long __t = (unsigned long)(t);    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("insb %0, %1, %2" : "+r"(__t) : "r"(__a), "K"(b));    \
        __t;    \
    })
/* ===== Inline Function End for 3.22. INSB ===== */

/* ===== Inline Function Start for 3.23. KABS8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief KABS8 (SIMD 8-bit Saturating Absolute)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KABS8 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Get the absolute value of 8-bit signed integer elements simultaneously.
 *
 * **Description**:\n
 * This instruction calculates the absolute value of 8-bit signed integer elements stored
 * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates
 * 0x7f as the output and sets the OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.B[x];
 * if (src == 0x80) {
 *   src = 0x7f;
 *   OV = 1;
 * } else if (src[7] == 1)
 *   src = -src;
 * }
 * Rd.B[x] = src;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KABS8(unsigned long a)
{
    unsigned long result;
    __ASM volatile("kabs8 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.23. KABS8 ===== */

/* ===== Inline Function Start for 3.24. KABS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief KABS16 (SIMD 16-bit Saturating Absolute)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KABS16 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Get the absolute value of 16-bit signed integer elements simultaneously.
 *
 * **Description**:\n
 * This instruction calculates the absolute value of 16-bit signed integer elements stored
 * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction
 * generates 0x7fff as the output and sets the OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.H[x];
 * if (src == 0x8000) {
 *   src = 0x7fff;
 *   OV = 1;
 * } else if (src[15] == 1)
 *   src = -src;
 * }
 * Rd.H[x] = src;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KABS16(unsigned long a)
{
    unsigned long result;
    __ASM volatile("kabs16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.24. KABS16 ===== */

/* ===== Inline Function Start for 3.25. KABSW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KABSW (Scalar 32-bit Absolute Value with Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KABSW Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Get the absolute value of a signed 32-bit integer in a general register.
 *
 * **Description**:\n
 * This instruction calculates the absolute value of a signed 32-bit integer stored in Rs1.
 * The result is sign-extended (for RV64) and written to Rd. This instruction with the minimum
 * negative integer input of 0x80000000 will produce a saturated output of maximum positive integer
 * of 0x7fffffff and the OV flag will be set to 1.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs1.W[0] >= 0) {
 *   res = Rs1.W[0];
 * } else {
 *   If (Rs1.W[0] == 0x80000000) {
 *     res = 0x7fffffff;
 *     OV = 1;
 *   } else {
 *     res = -Rs1.W[0];
 *   }
 * }
 * Rd = SE32(res);
 * ~~~
 *
 * \param [in]  a    signed long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KABSW(signed long a)
{
    unsigned long result;
    __ASM volatile("kabsw %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.25. KABSW ===== */

/* ===== Inline Function Start for 3.26. KADD8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief KADD8 (SIMD 8-bit Signed Saturating Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KADD8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
 * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they
 * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.B[x] + Rs2.B[x];
 * if (res[x] > 127) {
 *   res[x] = 127;
 *   OV = 1;
 * } else if (res[x] < -128) {
 *   res[x] = -128;
 *   OV = 1;
 * }
 * Rd.B[x] = res[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KADD8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.26. KADD8 ===== */

/* ===== Inline Function Start for 3.27. KADD16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief KADD16 (SIMD 16-bit Signed Saturating Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KADD16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
 * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1),
 * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.H[x] + Rs2.H[x];
 * if (res[x] > 32767) {
 *   res[x] = 32767;
 *   OV = 1;
 * } else if (res[x] < -32768) {
 *   res[x] = -32768;
 *   OV = 1;
 * }
 * Rd.H[x] = res[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KADD16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.27. KADD16 ===== */

/* ===== Inline Function Start for 3.28. KADD64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief KADD64 (64-bit Signed Saturating Addition)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * KADD64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add two 64-bit signed integers. The result is saturated to the Q63 range.
 *
 * **RV32 Description**:\n
 * This instruction adds the 64-bit signed integer of an even/odd pair of registers
 * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by
 * Rs2(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the
 * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers
 * specified by Rd(4,1).
 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
 * pair includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed
 * integer in Rs2. If the result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the
 * range and the OV bit is set to 1. The saturated result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 *  t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 *  a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
 *  b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
 *  result = R[a_H].R[a_L] + R[b_H].R[b_L];
 *  if (result > (2^63)-1) {
 *    result = (2^63)-1; OV = 1;
 *  } else if (result < -2^63) {
 *    result = -2^63; OV = 1;
 *  }
 *  R[t_H].R[t_L] = result;
 * RV64:
 *  result = Rs1 + Rs2;
 *  if (result > (2^63)-1) {
 *    result = (2^63)-1; OV = 1;
 *  } else if (result < -2^63) {
 *    result = -2^63; OV = 1;
 *  }
 *  Rd = result;
 * ~~~
 *
 * \param [in]  a    long long type of value stored in a
 * \param [in]  b    long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_KADD64(long long a, long long b)
{
    long long result;
    __ASM volatile("kadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.28. KADD64 ===== */

/* ===== Inline Function Start for 3.29. KADDH ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
 * \brief KADDH (Signed Addition with Q15 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KADDH Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add the signed lower 32-bit content of two registers with Q15 saturation.
 *
 * **Description**:\n
 * The signed lower 32-bit content of Rs1 is added with the signed lower 32-bit content of
 * Rs2. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then sign-
 * extended and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] + Rs2.W[0];
 * if (tmp > 32767) {
 *   res = 32767;
 *   OV = 1;
 * } else if (tmp < -32768) {
 *   res = -32768;
 *   OV = 1
 * } else {
 *   res = tmp;
 * }
 * Rd = SE(tmp[15:0]);
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KADDH(int a, int b)
{
    long result;
    __ASM volatile("kaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.29. KADDH ===== */

/* ===== Inline Function Start for 3.30. KADDW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KADDW (Signed Addition with Q31 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KADDW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add the lower 32-bit signed content of two registers with Q31 saturation.
 *
 * **Description**:\n
 * The lower 32-bit signed content of Rs1 is added with the lower 32-bit signed content of
 * Rs2. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then sign-
 * extended and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] + Rs2.W[0];
 * if (tmp > (2^31)-1) {
 *   res = (2^31)-1;
 *   OV = 1;
 * } else if (tmp < -2^31) {
 *   res = -2^31;
 *   OV = 1
 * } else {
 *   res = tmp;
 * }
 * Rd = res[31:0]; // RV32
 * Rd = SE(res[31:0]) // RV64
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KADDW(int a, int b)
{
    long result;
    __ASM volatile("kaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.30. KADDW ===== */

/* ===== Inline Function Start for 3.31. KCRAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief KCRAS16 (SIMD 16-bit Signed Saturating Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KCRAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating addition and 16-bit signed integer element
 * saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-
 * bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
 * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it
 * subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed
 * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
 * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for
 * subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
 * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KCRAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.31. KCRAS16 ===== */

/* ===== Inline Function Start for 3.32. KCRSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief KCRSA16 (SIMD 16-bit Signed Saturating Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KCRSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element
 * saturating addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit
 * chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks
 * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it
 * adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 with the 16-bit signed
 * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
 * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd
 * for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
 * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KCRSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.32. KCRSA16 ===== */

/* ===== Inline Function Start for 3.33.1. KDMBB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KDMBB (Signed Saturating Double Multiply B16 x B16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
 * written into the destination register for RV32 or sign-extended to 64-bits and written into the
 * destination register for RV64. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
 * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
 * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
 * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult = aop * bop;
 *   resQ31 = Mresult << 1;
 *   Rd = resQ31; // RV32
 *   Rd = SE(resQ31); // RV64
 * } else {
 *   resQ31 = 0x7FFFFFFF;
 *   Rd = resQ31; // RV32
 *   Rd = SE(resQ31); // RV64
 *   OV = 1;
 * }
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KDMBB(unsigned int a, unsigned int b)
{
    long result;
    __ASM volatile("kdmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.33.1. KDMBB ===== */

/* ===== Inline Function Start for 3.33.2. KDMBT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KDMBT (Signed Saturating Double Multiply B16 x T16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
 * written into the destination register for RV32 or sign-extended to 64-bits and written into the
 * destination register for RV64. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
 * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
 * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
 * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult = aop * bop;
 *   resQ31 = Mresult << 1;
 *   Rd = resQ31; // RV32
 *   Rd = SE(resQ31); // RV64
 * } else {
 *   resQ31 = 0x7FFFFFFF;
 *   Rd = resQ31; // RV32
 *   Rd = SE(resQ31); // RV64
 *   OV = 1;
 * }
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KDMBT(unsigned int a, unsigned int b)
{
    long result;
    __ASM volatile("kdmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.33.2. KDMBT ===== */

/* ===== Inline Function Start for 3.33.3. KDMTT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KDMTT (Signed Saturating Double Multiply T16 x T16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
 * written into the destination register for RV32 or sign-extended to 64-bits and written into the
 * destination register for RV64. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
 * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
 * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
 * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult = aop * bop;
 *   resQ31 = Mresult << 1;
 *   Rd = resQ31; // RV32
 *   Rd = SE(resQ31); // RV64
 * } else {
 *   resQ31 = 0x7FFFFFFF;
 *   Rd = resQ31; // RV32
 *   Rd = SE(resQ31); // RV64
 *   OV = 1;
 * }
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KDMTT(unsigned int a, unsigned int b)
{
    long result;
    __ASM volatile("kdmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.33.3. KDMTT ===== */

/* ===== Inline Function Start for 3.34.1. KDMABB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KDMABB (Signed Saturating Double Multiply Addition B16 x B16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
 * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
 * result into the destination register. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
 * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
 * the OV flag is set to 1. The result after saturation is written to Rd.
 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
 * set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult = aop * bop;
 *   resQ31 = Mresult << 1;
 * } else {
 *   resQ31 = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * resadd = Rd + resQ31; // RV32
 * resadd = Rd.W[0] + resQ31; // RV64
 * if (resadd > (2^31)-1) {
 *   resadd = (2^31)-1;
 *   OV = 1;
 * } else if (resadd < -2^31) {
 *   resadd = -2^31;
 *   OV = 1;
 * }
 * Rd = resadd; // RV32
 * Rd = SE(resadd); // RV64
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KDMABB(long t, unsigned int a, unsigned int b)
{
    __ASM volatile("kdmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.34.1. KDMABB ===== */

/* ===== Inline Function Start for 3.34.2. KDMABT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KDMABT (Signed Saturating Double Multiply Addition B16 x T16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
 * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
 * result into the destination register. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
 * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
 * the OV flag is set to 1. The result after saturation is written to Rd.
 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
 * set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult = aop * bop;
 *   resQ31 = Mresult << 1;
 * } else {
 *   resQ31 = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * resadd = Rd + resQ31; // RV32
 * resadd = Rd.W[0] + resQ31; // RV64
 * if (resadd > (2^31)-1) {
 *   resadd = (2^31)-1;
 *   OV = 1;
 * } else if (resadd < -2^31) {
 *   resadd = -2^31;
 *   OV = 1;
 * }
 * Rd = resadd; // RV32
 * Rd = SE(resadd); // RV64
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KDMABT(long t, unsigned int a, unsigned int b)
{
    __ASM volatile("kdmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.34.2. KDMABT ===== */

/* ===== Inline Function Start for 3.34.3. KDMATT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KDMATT (Signed Saturating Double Multiply Addition T16 x T16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
 * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
 * result into the destination register. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
 * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
 * the OV flag is set to 1. The result after saturation is written to Rd.
 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
 * set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult = aop * bop;
 *   resQ31 = Mresult << 1;
 * } else {
 *   resQ31 = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * resadd = Rd + resQ31; // RV32
 * resadd = Rd.W[0] + resQ31; // RV64
 * if (resadd > (2^31)-1) {
 *   resadd = (2^31)-1;
 *   OV = 1;
 * } else if (resadd < -2^31) {
 *   resadd = -2^31;
 *   OV = 1;
 * }
 * Rd = resadd; // RV32
 * Rd = SE(resadd); // RV64
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KDMATT(long t, unsigned int a, unsigned int b)
{
    __ASM volatile("kdmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.34.3. KDMATT ===== */

/* ===== Inline Function Start for 3.35.1. KHM8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
 * \brief KHM8 (SIMD Signed Saturating Q7 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KHM8 Rd, Rs1, Rs2
 * KHMX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
 * numbers again.
 *
 * **Description**:\n
 * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
 * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
 * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
 * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
 * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
 * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
 * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
 * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
 * The result will be saturated to 0x7F and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * if (is `KHM8`) {
 *   op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
 *   op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
 * } else if (is `KHMX8`) {
 *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
 *   op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x80 != aop | 0x80 != bop) {
 *     res = (aop s* bop) >> 7;
 *   } else {
 *     res= 0x7F;
 *     OV = 1;
 *   }
 * }
 * Rd.H[x/2] = concat(rest, resb);
 * for RV32, x=0,2
 * for RV64, x=0,2,4,6
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KHM8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("khm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.35.1. KHM8 ===== */

/* ===== Inline Function Start for 3.35.2. KHMX8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
 * \brief KHMX8 (SIMD Signed Saturating Crossed Q7 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KHM8 Rd, Rs1, Rs2
 * KHMX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
 * numbers again.
 *
 * **Description**:\n
 * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
 * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
 * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
 * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
 * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
 * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
 * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
 * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
 * The result will be saturated to 0x7F and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * if (is `KHM8`) {
 *   op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
 *   op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
 * } else if (is `KHMX8`) {
 *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
 *   op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x80 != aop | 0x80 != bop) {
 *     res = (aop s* bop) >> 7;
 *   } else {
 *     res= 0x7F;
 *     OV = 1;
 *   }
 * }
 * Rd.H[x/2] = concat(rest, resb);
 * for RV32, x=0,2
 * for RV64, x=0,2,4,6
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KHMX8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("khmx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.35.2. KHMX8 ===== */

/* ===== Inline Function Start for 3.36.1. KHM16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
 * \brief KHM16 (SIMD Signed Saturating Q15 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KHM16 Rd, Rs1, Rs2
 * KHMX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
 * Q15 numbers again.
 *
 * **Description**:\n
 * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
 * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
 * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
 * Rs2.
 * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
 * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
 * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
 * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
 * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
 * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * if (is `KHM16`) {
 *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
 *   op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
 * } else if (is `KHMX16`) {
 *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
 *   op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x8000 != aop | 0x8000 != bop) {
 *     res = (aop s* bop) >> 15;
 *   } else {
 *     res= 0x7FFF;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x/2] = concat(rest, resb);
 * for RV32: x=0
 * for RV64: x=0,2
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KHM16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("khm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.36.1. KHM16 ===== */

/* ===== Inline Function Start for 3.36.2. KHMX16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
 * \brief KHMX16 (SIMD Signed Saturating Crossed Q15 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KHM16 Rd, Rs1, Rs2
 * KHMX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
 * Q15 numbers again.
 *
 * **Description**:\n
 * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
 * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
 * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
 * Rs2.
 * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
 * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
 * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
 * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
 * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
 * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * if (is `KHM16`) {
 *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
 *   op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
 * } else if (is `KHMX16`) {
 *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
 *   op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x8000 != aop | 0x8000 != bop) {
 *     res = (aop s* bop) >> 15;
 *   } else {
 *     res= 0x7FFF;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x/2] = concat(rest, resb);
 * for RV32: x=0
 * for RV64: x=0,2
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KHMX16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("khmx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.36.2. KHMX16 ===== */

/* ===== Inline Function Start for 3.37.1. KHMBB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
 * \brief KHMBB (Signed Saturating Half Multiply B16 x B16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
 * number again and saturate the Q15 result into the destination register. If saturation happens, an
 * overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
 * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
 * to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult[31:0] = aop * bop;
 *   res[15:0] = Mresult[30:15];
 * } else {
 *   res[15:0] = 0x7FFF;
 *   OV = 1;
 * }
 * Rd = SE32(res[15:0]); // Rv32
 * Rd = SE64(res[15:0]); // RV64
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KHMBB(unsigned int a, unsigned int b)
{
    long result;
    __ASM volatile("khmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.37.1. KHMBB ===== */

/* ===== Inline Function Start for 3.37.2. KHMBT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
 * \brief KHMBT (Signed Saturating Half Multiply B16 x T16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
 * number again and saturate the Q15 result into the destination register. If saturation happens, an
 * overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
 * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
 * to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult[31:0] = aop * bop;
 *   res[15:0] = Mresult[30:15];
 * } else {
 *   res[15:0] = 0x7FFF;
 *   OV = 1;
 * }
 * Rd = SE32(res[15:0]); // Rv32
 * Rd = SE64(res[15:0]); // RV64
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KHMBT(unsigned int a, unsigned int b)
{
    long result;
    __ASM volatile("khmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.37.2. KHMBT ===== */

/* ===== Inline Function Start for 3.37.3. KHMTT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
 * \brief KHMTT (Signed Saturating Half Multiply T16 x T16)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
 * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
 * number again and saturate the Q15 result into the destination register. If saturation happens, an
 * overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
 * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
 * to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult[31:0] = aop * bop;
 *   res[15:0] = Mresult[30:15];
 * } else {
 *   res[15:0] = 0x7FFF;
 *   OV = 1;
 * }
 * Rd = SE32(res[15:0]); // Rv32
 * Rd = SE64(res[15:0]); // RV64
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KHMTT(unsigned int a, unsigned int b)
{
    long result;
    __ASM volatile("khmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.37.3. KHMTT ===== */

/* ===== Inline Function Start for 3.38.1. KMABB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMABB (SIMD Saturating Signed Multiply Bottom Halfs & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMABB Rd, Rs1, Rs2
 * KMABT Rd, Rs1, Rs2
 * KMATT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
 * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
 * third register. The addition result may be saturated and is written to the third register.
 * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
 * * KMABT rd.W[x] + bottom*top (per 32-bit element)
 * * KMATT rd.W[x] + top*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2.
 * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the top 16-bit content of 32-bit elements in Rs2.
 * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * top 16-bit content of 32-bit elements in Rs2.
 * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
 * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
 * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
 * signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMABB(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.38.1. KMABB ===== */

/* ===== Inline Function Start for 3.38.2. KMABT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMABT (SIMD Saturating Signed Multiply Bottom & Top Halfs & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMABB Rd, Rs1, Rs2
 * KMABT Rd, Rs1, Rs2
 * KMATT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
 * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
 * third register. The addition result may be saturated and is written to the third register.
 * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
 * * KMABT rd.W[x] + bottom*top (per 32-bit element)
 * * KMATT rd.W[x] + top*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2.
 * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the top 16-bit content of 32-bit elements in Rs2.
 * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * top 16-bit content of 32-bit elements in Rs2.
 * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
 * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
 * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
 * signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMABT(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.38.2. KMABT ===== */

/* ===== Inline Function Start for 3.38.3. KMATT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMATT (SIMD Saturating Signed Multiply Top Halfs & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMABB Rd, Rs1, Rs2
 * KMABT Rd, Rs1, Rs2
 * KMATT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
 * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
 * third register. The addition result may be saturated and is written to the third register.
 * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
 * * KMABT rd.W[x] + bottom*top (per 32-bit element)
 * * KMATT rd.W[x] + top*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2.
 * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the top 16-bit content of 32-bit elements in Rs2.
 * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * top 16-bit content of 32-bit elements in Rs2.
 * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
 * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
 * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
 * signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMATT(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.38.3. KMATT ===== */

/* ===== Inline Function Start for 3.39.1. KMADA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMADA (SIMD Saturating Signed Multiply Two Halfs and Two Adds)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMADA Rd, Rs1, Rs2
 * KMAXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds
 * the two 32-bit results and 32-bit elements in a third register together. The addition result may be
 * saturated.
 * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element)
 * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
 * elements in Rs2.
 * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying
 * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in
 * Rs2.
 * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31
 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit
 * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * // KMADA
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * // KMAXDA
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 * OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMADA(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.39.1. KMADA ===== */

/* ===== Inline Function Start for 3.39.2. KMAXDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMAXDA (SIMD Saturating Signed Crossed Multiply Two Halfs and Two Adds)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMADA Rd, Rs1, Rs2
 * KMAXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds
 * the two 32-bit results and 32-bit elements in a third register together. The addition result may be
 * saturated.
 * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element)
 * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
 * elements in Rs2.
 * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying
 * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in
 * Rs2.
 * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31
 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit
 * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * // KMADA
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * // KMAXDA
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 * OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMAXDA(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmaxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.39.2. KMAXDA ===== */

/* ===== Inline Function Start for 3.40.1. KMADS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMADS (SIMD Saturating Signed Multiply Two Halfs & Subtract & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMADS Rd, Rs1, Rs2
 * KMADRS Rd, Rs1, Rs2
 * KMAXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
 * the corresponding 32-bit elements in a third register. The addition result may be saturated.
 * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
 * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
 * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
 * elements in Rs2.
 * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
 * bit elements in Rs2.
 * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
 * elements in Rs2.
 * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
 * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
 * and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * // KMADS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * // KMADRS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
 * // KMAXDS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMADS(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmads %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.40.1. KMADS ===== */

/* ===== Inline Function Start for 3.40.2. KMADRS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMADRS (SIMD Saturating Signed Multiply Two Halfs & Reverse Subtract & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMADS Rd, Rs1, Rs2
 * KMADRS Rd, Rs1, Rs2
 * KMAXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
 * the corresponding 32-bit elements in a third register. The addition result may be saturated.
 * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
 * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
 * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
 * elements in Rs2.
 * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
 * bit elements in Rs2.
 * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
 * elements in Rs2.
 * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
 * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
 * and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * // KMADS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * // KMADRS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
 * // KMAXDS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMADRS(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmadrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.40.2. KMADRS ===== */

/* ===== Inline Function Start for 3.40.3. KMAXDS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMAXDS (SIMD Saturating Signed Crossed Multiply Two Halfs & Subtract & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMADS Rd, Rs1, Rs2
 * KMADRS Rd, Rs1, Rs2
 * KMAXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
 * the corresponding 32-bit elements in a third register. The addition result may be saturated.
 * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
 * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
 * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
 * elements in Rs2.
 * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
 * bit elements in Rs2.
 * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
 * elements in Rs2.
 * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
 * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
 * and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * // KMADS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * // KMADRS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
 * // KMAXDS
 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMAXDS(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmaxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.40.3. KMAXDS ===== */

/* ===== Inline Function Start for 3.41. KMAR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief KMAR64 (Signed Multiply and Saturating Add to 64-Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * KMAR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication
 * results to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
 * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds
 * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by
 * Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the Q63 number range (-2^63 <=
 * Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated result is written back
 * to the even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
 * pair includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
 * adds the 64-bit multiplication results to the 64-bit signed data of Rd with unlimited precision. If the
 * 64-bit addition result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range
 * and the OV bit is set to 1. The saturated result is written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * result = R[t_H].R[t_L] + (Rs1 * Rs2);
 * if (result > (2^63)-1) {
 *   result = (2^63)-1; OV = 1;
 * } else if (result < -2^63) {
 *   result = -2^63; OV = 1;
 * }
 * R[t_H].R[t_L] = result;
 * RV64:
 * // `result` has unlimited precision
 * result = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]);
 * if (result > (2^63)-1) {
 *   result = (2^63)-1; OV = 1;
 * } else if (result < -2^63) {
 *   result = -2^63; OV = 1;
 * }
 * Rd = result;
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_KMAR64(long long t, long a, long b)
{
    __ASM volatile("kmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.41. KMAR64 ===== */

/* ===== Inline Function Start for 3.42.1. KMDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMDA (SIMD Signed Multiply Two Halfs and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMDA Rd, Rs1, Rs2
 * KMXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * adds the two 32-bit results together. The addition result may be saturated.
 * * KMDA: top*top + bottom*bottom (per 32-bit element)
 * * KMXDA: top*bottom + bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
 * bit elements of Rs2.
 * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
 * 32-bit elements of Rs2.
 * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1.
 * The final results are written to Rd. The 16-bit contents are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * if  Rs1.W[x]  !=  0x80008000)  or  (Rs2.W[x]  !=  0x80008000  {  //  KMDA  Rd.W[x]  =  Rs1.W[x].H[1]  *
 * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0])
 * +  (Rs1.W[x].H[0]  *  Rs2.W[x].H[1];  }  else  {  Rd.W[x]  =  0x7fffffff;  OV  =  1;  }  for  RV32:  x=0  for  RV64:
 * x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMDA(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("kmda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.42.1. KMDA ===== */

/* ===== Inline Function Start for 3.42.2. KMXDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMXDA (SIMD Signed Crossed Multiply Two Halfs and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMDA Rd, Rs1, Rs2
 * KMXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * adds the two 32-bit results together. The addition result may be saturated.
 * * KMDA: top*top + bottom*bottom (per 32-bit element)
 * * KMXDA: top*bottom + bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
 * bit elements of Rs2.
 * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
 * 32-bit elements of Rs2.
 * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1.
 * The final results are written to Rd. The 16-bit contents are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * if  Rs1.W[x]  !=  0x80008000)  or  (Rs2.W[x]  !=  0x80008000  {  //  KMDA  Rd.W[x]  =  Rs1.W[x].H[1]  *
 * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0])
 * +  (Rs1.W[x].H[0]  *  Rs2.W[x].H[1];  }  else  {  Rd.W[x]  =  0x7fffffff;  OV  =  1;  }  for  RV32:  x=0  for  RV64:
 * x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMXDA(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("kmxda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.42.2. KMXDA ===== */

/* ===== Inline Function Start for 3.43.1. KMMAC ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief KMMAC (SIMD Saturating MSW Signed Multiply Word and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAC Rd, Rs1, Rs2
 * KMMAC.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of two registers and add the most significant
 * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are
 * saturated first and then written back to the third register. The `.u` form performs an additional
 * rounding up operation on the multiplication results before adding the most significant 32-bit part
 * of the results.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
 * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
 * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][63:31] + 1;
 *   res[x] = Rd.W[x] + Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] + Mres[x][63:32];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAC(long t, long a, long b)
{
    __ASM volatile("kmmac %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.43.1. KMMAC ===== */

/* ===== Inline Function Start for 3.43.2. KMMAC.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief KMMAC.u (SIMD Saturating MSW Signed Multiply Word and Add with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAC Rd, Rs1, Rs2
 * KMMAC.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of two registers and add the most significant
 * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are
 * saturated first and then written back to the third register. The `.u` form performs an additional
 * rounding up operation on the multiplication results before adding the most significant 32-bit part
 * of the results.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
 * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
 * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][63:31] + 1;
 *   res[x] = Rd.W[x] + Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] + Mres[x][63:32];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAC_U(long t, long a, long b)
{
    __ASM volatile("kmmac.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.43.2. KMMAC.u ===== */

/* ===== Inline Function Start for 3.44.1. KMMAWB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWB (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWB Rd, Rs1, Rs2
 * KMMAWB.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register and add the most significant 32-bit results with
 * the corresponding signed 32-bit elements of a third register. The addition result is written to the
 * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
 * results from the most significant discarded bit before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
 * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication
 * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
 * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
 * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
 * bit 15 of the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   res[x] = Rd.W[x] + Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] + Mres[x][47:16];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWB(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.44.1. KMMAWB ===== */

/* ===== Inline Function Start for 3.44.2. KMMAWB.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWB.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWB Rd, Rs1, Rs2
 * KMMAWB.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register and add the most significant 32-bit results with
 * the corresponding signed 32-bit elements of a third register. The addition result is written to the
 * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
 * results from the most significant discarded bit before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
 * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication
 * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
 * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
 * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
 * bit 15 of the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   res[x] = Rd.W[x] + Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] + Mres[x][47:16];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWB_U(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.44.2. KMMAWB.u ===== */

/* ===== Inline Function Start for 3.45.1. KMMAWB2 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWB2 Rd, Rs1, Rs2
 * KMMAWB2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and add the
 * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
 * register. The saturated addition result is written to the corresponding 32-bit elements of the third
 * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
 * before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
 * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
 * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
 * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
 * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
 * the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
 *   addop.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
 *   if (`.u` form) {
 *     Mres[x][47:14] = Mres[x][47:14] + 1;
 *   }
 *   addop.W[x] = Mres[x][46:15]; // doubling
 * }
 * res[x] = Rd.W[x] + addop.W[x];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWB2(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawb2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.45.1. KMMAWB2 ===== */

/* ===== Inline Function Start for 3.45.2. KMMAWB2.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWB2 Rd, Rs1, Rs2
 * KMMAWB2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and add the
 * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
 * register. The saturated addition result is written to the corresponding 32-bit elements of the third
 * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
 * before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
 * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
 * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
 * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
 * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
 * the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
 *   addop.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
 *   if (`.u` form) {
 *     Mres[x][47:14] = Mres[x][47:14] + 1;
 *   }
 *   addop.W[x] = Mres[x][46:15]; // doubling
 * }
 * res[x] = Rd.W[x] + addop.W[x];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWB2_U(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawb2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.45.2. KMMAWB2.u ===== */

/* ===== Inline Function Start for 3.46.1. KMMAWT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWT (SIMD Saturating MSW Signed Multiply Word and Top Half and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWT Rd, Rs1, Rs2
 * KMMAWT.u Rd Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the
 * corresponding 32-bit elements of another register and add the most significant 32-bit results with
 * the corresponding signed 32-bit elements of a third register. The addition results are written to the
 * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
 * results from the most significant discarded bit before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the
 * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results
 * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
 * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
 * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
 * bit 15 of the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   res[x] = Rd.W[x] + Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] + Mres[x][47:16];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWT(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.46.1. KMMAWT ===== */

/* ===== Inline Function Start for 3.46.2. KMMAWT.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWT.u (SIMD Saturating MSW Signed Multiply Word and Top Half and Add with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWT Rd, Rs1, Rs2
 * KMMAWT.u Rd Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the
 * corresponding 32-bit elements of another register and add the most significant 32-bit results with
 * the corresponding signed 32-bit elements of a third register. The addition results are written to the
 * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
 * results from the most significant discarded bit before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the
 * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results
 * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
 * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
 * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
 * bit 15 of the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   res[x] = Rd.W[x] + Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] + Mres[x][47:16];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWT_U(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawt.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.46.2. KMMAWT.u ===== */

/* ===== Inline Function Start for 3.47.1. KMMAWT2 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWT2 Rd, Rs1, Rs2
 * KMMAWT2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit elements of one register and the top 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and add the
 * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
 * register. The saturated addition result is written to the corresponding 32-bit elements of the third
 * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
 * before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
 * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
 * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
 * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
 * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
 * the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
 *   addop.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
 *   if (`.u` form) {
 *     Mres[x][47:14] = Mres[x][47:14] + 1;
 *   }
 *   addop.W[x] = Mres[x][46:15]; // doubling
 * }
 * res[x] = Rd.W[x] + addop.W[x];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWT2(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawt2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.47.1. KMMAWT2 ===== */

/* ===== Inline Function Start for 3.47.2. KMMAWT2.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMAWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMAWT2 Rd, Rs1, Rs2
 * KMMAWT2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit elements of one register and the top 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and add the
 * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
 * register. The saturated addition result is written to the corresponding 32-bit elements of the third
 * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
 * before the addition operations.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
 * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
 * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
 * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
 * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
 * the result before the addition operations.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
 *   addop.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
 *   if (`.u` form) {
 *     Mres[x][47:14] = Mres[x][47:14] + 1;
 *   }
 *   addop.W[x] = Mres[x][46:15]; // doubling
 * }
 * res[x] = Rd.W[x] + addop.W[x];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMAWT2_U(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmmawt2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.47.2. KMMAWT2.u ===== */

/* ===== Inline Function Start for 3.48.1. KMMSB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief KMMSB (SIMD Saturating MSW Signed Multiply Word and Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMSB Rd, Rs1, Rs2
 * KMMSB.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of two registers and subtract the most
 * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results
 * are written to the third register. The `.u` form performs an additional rounding up operation on
 * the multiplication results before subtracting the most significant 32-bit part of the results.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
 * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
 * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][63:31] + 1;
 *   res[x] = Rd.W[x] - Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] - Mres[x][63:32];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMSB(long t, long a, long b)
{
    __ASM volatile("kmmsb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.48.1. KMMSB ===== */

/* ===== Inline Function Start for 3.48.2. KMMSB.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief KMMSB.u (SIMD Saturating MSW Signed Multiply Word and Subtraction with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMSB Rd, Rs1, Rs2
 * KMMSB.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of two registers and subtract the most
 * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results
 * are written to the third register. The `.u` form performs an additional rounding up operation on
 * the multiplication results before subtracting the most significant 32-bit part of the results.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
 * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
 * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][63:31] + 1;
 *   res[x] = Rd.W[x] - Round[x][32:1];
 * } else {
 *   res[x] = Rd.W[x] - Mres[x][63:32];
 * }
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMSB_U(long t, long a, long b)
{
    __ASM volatile("kmmsb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.48.2. KMMSB.u ===== */

/* ===== Inline Function Start for 3.49.1. KMMWB2 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMWB2 Rd, Rs1, Rs2
 * KMMWB2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and write the
 * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
 * form rounds up the results from the most significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
 * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
 * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
 *   Rd.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
 *   if (`.u` form) {
 *     Round[x][32:0] = Mres[x][46:14] + 1;
 *     Rd.W[x] = Round[x][32:1];
 *   } else {
 *     Rd.W[x] = Mres[x][46:15];
 *   }
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMWB2(long a, unsigned long b)
{
    long result;
    __ASM volatile("kmmwb2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.49.1. KMMWB2 ===== */

/* ===== Inline Function Start for 3.49.2. KMMWB2.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMWB2 Rd, Rs1, Rs2
 * KMMWB2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and write the
 * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
 * form rounds up the results from the most significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
 * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
 * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
 *   Rd.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
 *   if (`.u` form) {
 *     Round[x][32:0] = Mres[x][46:14] + 1;
 *     Rd.W[x] = Round[x][32:1];
 *   } else {
 *     Rd.W[x] = Mres[x][46:15];
 *   }
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMWB2_U(long a, unsigned long b)
{
    long result;
    __ASM volatile("kmmwb2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.49.2. KMMWB2.u ===== */

/* ===== Inline Function Start for 3.50.1. KMMWT2 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMWT2 Rd, Rs1, Rs2
 * KMMWT2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and write the
 * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
 * form rounds up the results from the most significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
 * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
 * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
 *   Rd.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
 *   if (`.u` form) {
 *     Round[x][32:0] = Mres[x][46:14] + 1;
 *     Rd.W[x] = Round[x][32:1];
 *   } else {
 *     Rd.W[x] = Mres[x][46:15];
 *   }
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMWT2(long a, unsigned long b)
{
    long result;
    __ASM volatile("kmmwt2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.50.1. KMMWT2 ===== */

/* ===== Inline Function Start for 3.50.2. KMMWT2.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief KMMWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMMWT2 Rd, Rs1, Rs2
 * KMMWT2.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
 * corresponding 32-bit elements of another register, double the multiplication results and write the
 * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
 * form rounds up the results from the most significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
 * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
 * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
 * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
 *   Rd.W[x] = 0x7fffffff;
 *   OV = 1;
 * } else {
 *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
 *   if (`.u` form) {
 *     Round[x][32:0] = Mres[x][46:14] + 1;
 *     Rd.W[x] = Round[x][32:1];
 *   } else {
 *     Rd.W[x] = Mres[x][46:15];
 *   }
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMMWT2_U(long a, unsigned long b)
{
    long result;
    __ASM volatile("kmmwt2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.50.2. KMMWT2.u ===== */

/* ===== Inline Function Start for 3.51.1. KMSDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMSDA (SIMD Saturating Signed Multiply Two Halfs & Add & Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMSDA Rd, Rs1, Rs2
 * KMSXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The
 * subtraction result may be saturated.
 * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element)
 * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
 * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
 * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32-
 * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
 * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The
 * 16-bit contents are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * // KMSDA
 * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * // KMSXDA
 * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMSDA(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmsda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.51.1. KMSDA ===== */

/* ===== Inline Function Start for 3.51.2. KMSXDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief KMSXDA (SIMD Saturating Signed Crossed Multiply Two Halfs & Add & Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KMSDA Rd, Rs1, Rs2
 * KMSXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The
 * subtraction result may be saturated.
 * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element)
 * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
 * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
 * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32-
 * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
 * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The
 * 16-bit contents are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * // KMSDA
 * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * // KMSXDA
 * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMSXDA(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmsxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.51.2. KMSXDA ===== */

/* ===== Inline Function Start for 3.52. KMSR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief KMSR64 (Signed Multiply and Saturating Subtract from 64-Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * KMSR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication
 * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
 * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It
 * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers
 * specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the Q63
 * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated
 * result is written back to the even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
 * subtracts the 64-bit multiplication results from the 64-bit signed data in Rd with unlimited
 * precision. If the 64-bit subtraction result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is
 * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * result = R[t_H].R[t_L] - (Rs1 * Rs2);
 * if (result > (2^63)-1) {
 *   result = (2^63)-1; OV = 1;
 * } else if (result < -2^63) {
 *   result = -2^63; OV = 1;
 * }
 * R[t_H].R[t_L] = result;
 * RV64:
 * // `result` has unlimited precision
 * result = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]);
 * if (result > (2^63)-1) {
 *   result = (2^63)-1; OV = 1;
 * } else if (result < -2^63) {
 *   result = -2^63; OV = 1;
 * }
 * Rd = result;
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_KMSR64(long long t, long a, long b)
{
    __ASM volatile("kmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.52. KMSR64 ===== */

/* ===== Inline Function Start for 3.53. KSLLW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KSLLW (Saturating Shift Left Logical for Word)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KSLLW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do logical left shift operation with saturation on a 32-bit word. The shift amount is a
 * variable from a GPR.
 *
 * **Description**:\n
 * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with
 * zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register. Any
 * shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated
 * to -2^31. And the saturated result is sign-extended and written to Rd. If any saturation is performed,
 * set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[4:0];
 * res[(31+sa):0] = Rs1.W[0] << sa;
 * if (res > (2^31)-1) {
 *   res = 0x7fffffff; OV = 1;
 * } else if (res < -2^31) {
 *   res = 0x80000000; OV = 1;
 * }
 * Rd[31:0] = res[31:0]; // RV32
 * Rd[63:0] = SE(res[31:0]); // RV64
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KSLLW(long a, unsigned int b)
{
    long result;
    __ASM volatile("ksllw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.53. KSLLW ===== */

/* ===== Inline Function Start for 3.54. KSLLIW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KSLLIW (Saturating Shift Left Logical Immediate for Word)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KSLLIW Rd, Rs1, imm5u
 * ~~~
 *
 * **Purpose**:\n
 * Do logical left shift operation with saturation on a 32-bit word. The shift amount is an
 * immediate value.
 *
 * **Description**:\n
 * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with
 * zero and the shift amount is specified by the imm5u constant. Any shifted value greater than 2^31-1 is
 * saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated result is
 * sign-extended and written to Rd. If any saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u;
 * res[(31+sa):0] = Rs1.W[0] << sa;
 * if (res > (2^31)-1) {
 *   res = 0x7fffffff; OV = 1;
 * } else if (res < -2^31) {
 *   res = 0x80000000; OV = 1;
 * }
 * Rd[31:0] = res[31:0]; // RV32
 * Rd[63:0] = SE(res[31:0]); // RV64
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
#define __RV_KSLLIW(a, b)    \
    ({    \
        long result;    \
        long __a = (long)(a);    \
        __ASM volatile("kslliw %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.54. KSLLIW ===== */

/* ===== Inline Function Start for 3.55. KSLL8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief KSLL8 (SIMD 8-bit Saturating Shift Left Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLL8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift
 * amount is a variable from a GPR.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
 * with zero and the shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
 * Any shifted value greater than 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is
 * saturated to -2^7. And the saturated results are written to Rd. If any saturation is performed, set OV
 * bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[2:0];
 * if (sa != 0) {
 *   res[(7+sa):0] = Rs1.B[x] << sa;
 *   if (res > (2^7)-1) {
 *     res = 0x7f; OV = 1;
 *   } else if (res < -2^7) {
 *     res = 0x80; OV = 1;
 *   }
 *   Rd.B[x] = res[7:0];
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLL8(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("ksll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.55. KSLL8 ===== */

/* ===== Inline Function Start for 3.56. KSLLI8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief KSLLI8 (SIMD 8-bit Saturating Shift Left Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLLI8 Rd, Rs1, imm3u
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift
 * amount is an immediate value.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
 * with zero and the shift amount is specified by the imm3u constant. Any shifted value greater than
 * 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is saturated to -2^7. And the saturated
 * results are written to Rd. If any saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm3u[2:0];
 * if (sa != 0) {
 *   res[(7+sa):0] = Rs1.B[x] << sa;
 *   if (res > (2^7)-1) {
 *     res = 0x7f; OV = 1;
 *   } else if (res < -2^7) {
 *     res = 0x80; OV = 1;
 *   }
 *   Rd.B[x] = res[7:0];
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_KSLLI8(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("kslli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.56. KSLLI8 ===== */

/* ===== Inline Function Start for 3.57. KSLL16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief KSLL16 (SIMD 16-bit Saturating Shift Left Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLL16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift
 * amount is a variable from a GPR.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
 * with zero and the shift amount is specified by the low-order 4-bits of the value in the Rs2 register.
 * Any shifted value greater than 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is
 * saturated to -2^15. And the saturated results are written to Rd. If any saturation is performed, set OV
 * bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[3:0];
 * if (sa != 0) {
 *   res[(15+sa):0] = Rs1.H[x] << sa;
 *   if (res > (2^15)-1) {
 *     res = 0x7fff; OV = 1;
 *   } else if (res < -2^15) {
 *     res = 0x8000; OV = 1;
 *   }
 *   Rd.H[x] = res[15:0];
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLL16(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("ksll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.57. KSLL16 ===== */

/* ===== Inline Function Start for 3.58. KSLLI16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief KSLLI16 (SIMD 16-bit Saturating Shift Left Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLLI16 Rd, Rs1, imm4u
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift
 * amount is an immediate value.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
 * with zero and the shift amount is specified by the imm4u constant. Any shifted value greater than
 * 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is saturated to -2^15. And the saturated
 * results are written to Rd. If any saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm4u[3:0];
 * if (sa != 0) {
 *   res[(15+sa):0] = Rs1.H[x] << sa;
 *   if (res > (2^15)-1) {
 *     res = 0x7fff; OV = 1;
 *   } else if (res < -2^15) {
 *     res = 0x8000; OV = 1;
 *   }
 *   Rd.H[x] = res[15:0];
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_KSLLI16(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("kslli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.58. KSLLI16 ===== */

/* ===== Inline Function Start for 3.59.1. KSLRA8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief KSLRA8 (SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLRA8 Rd, Rs1, Rs2
 * KSLRA8.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the
 * right shift.
 *
 * **Description**:\n
 * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
 * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
 * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
 * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form
 * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[3:0] < 0) {
 *   sa = -Rs2[3:0];
 *   sa = (sa == 8)? 7 : sa;
 *   if (`.u` form) {
 *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[7:0];
 *   } else {
 *     Rd.B[x] = SE8(Rs1.B[x][7:sa]);
 *   }
 * } else {
 *   sa = Rs2[2:0];
 *   res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
 *   if (res > (2^7)-1) {
 *     res[7:0] = 0x7f; OV = 1;
 *   } else if (res < -2^7) {
 *     res[7:0] = 0x80; OV = 1;
 *   }
 *   Rd.B[x] = res[7:0];
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLRA8(unsigned long a, int b)
{
    unsigned long result;
    __ASM volatile("kslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.59.1. KSLRA8 ===== */

/* ===== Inline Function Start for 3.59.2. KSLRA8.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief KSLRA8.u (SIMD 8-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLRA8 Rd, Rs1, Rs2
 * KSLRA8.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the
 * right shift.
 *
 * **Description**:\n
 * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
 * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
 * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
 * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form
 * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[3:0] < 0) {
 *   sa = -Rs2[3:0];
 *   sa = (sa == 8)? 7 : sa;
 *   if (`.u` form) {
 *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[7:0];
 *   } else {
 *     Rd.B[x] = SE8(Rs1.B[x][7:sa]);
 *   }
 * } else {
 *   sa = Rs2[2:0];
 *   res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
 *   if (res > (2^7)-1) {
 *     res[7:0] = 0x7f; OV = 1;
 *   } else if (res < -2^7) {
 *     res[7:0] = 0x80; OV = 1;
 *   }
 *   Rd.B[x] = res[7:0];
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLRA8_U(unsigned long a, int b)
{
    unsigned long result;
    __ASM volatile("kslra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.59.2. KSLRA8.u ===== */

/* ===== Inline Function Start for 3.60.1. KSLRA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief KSLRA16 (SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLRA16 Rd, Rs1, Rs2
 * KSLRA16.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the
 * right shift.
 *
 * **Description**:\n
 * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
 * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
 * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
 * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u`
 * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[4:0] < 0) {
 *   sa = -Rs2[4:0];
 *   sa = (sa == 16)? 15 : sa;
 *   if (`.u` form) {
 *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[15:0];
 *   } else {
 *     Rd.H[x] = SE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   sa = Rs2[3:0];
 *   res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
 *   if (res > (2^15)-1) {
 *     res[15:0] = 0x7fff; OV = 1;
 *   } else if (res < -2^15) {
 *     res[15:0] = 0x8000; OV = 1;
 *   }
 *   d.H[x] = res[15:0];
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLRA16(unsigned long a, int b)
{
    unsigned long result;
    __ASM volatile("kslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.60.1. KSLRA16 ===== */

/* ===== Inline Function Start for 3.60.2. KSLRA16.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief KSLRA16.u (SIMD 16-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSLRA16 Rd, Rs1, Rs2
 * KSLRA16.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the
 * right shift.
 *
 * **Description**:\n
 * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
 * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
 * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
 * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u`
 * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[4:0] < 0) {
 *   sa = -Rs2[4:0];
 *   sa = (sa == 16)? 15 : sa;
 *   if (`.u` form) {
 *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[15:0];
 *   } else {
 *     Rd.H[x] = SE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   sa = Rs2[3:0];
 *   res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
 *   if (res > (2^15)-1) {
 *     res[15:0] = 0x7fff; OV = 1;
 *   } else if (res < -2^15) {
 *     res[15:0] = 0x8000; OV = 1;
 *   }
 *   d.H[x] = res[15:0];
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLRA16_U(unsigned long a, int b)
{
    unsigned long result;
    __ASM volatile("kslra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.60.2. KSLRA16.u ===== */

/* ===== Inline Function Start for 3.61. KSLRAW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KSLRAW (Shift Left Logical with Q31 Saturation or Shift Right Arithmetic)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KSLRAW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31
 * saturation for the left shift on a 32-bit data.
 *
 * **Description**:\n
 * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
 * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31].
 * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. After the shift
 * operation, the final result is bit-31 sign-extended and written to Rd. If any saturation happens, this
 * instruction sets the OV flag. The value of Rs2[31:6] will not affected the operation of this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[5:0] < 0) {
 *   sa = -Rs2[5:0];
 *   sa = (sa == 32)? 31 : sa;
 *   res[31:0] = Rs1.W[0] >>(arith) sa;
 * } else {
 *   sa = Rs2[5:0];
 *   tmp = Rs1.W[0] <<(logic) sa;
 *   if (tmp > (2^31)-1) {
 *     res[31:0] = (2^31)-1;
 *     OV = 1;
 *   } else if (tmp < -2^31) {
 *     res[31:0] = -2^31;
 *     OV = 1
 *   } else {
 *     res[31:0] = tmp[31:0];
 *   }
 * }
 * Rd = res[31:0]; // RV32
 * Rd = SE64(res[31:0]); // RV64
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KSLRAW(int a, int b)
{
    long result;
    __ASM volatile("kslraw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.61. KSLRAW ===== */

/* ===== Inline Function Start for 3.62. KSLRAW.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KSLRAW.u (Shift Left Logical with Q31 Saturation or Rounding Shift Right Arithmetic)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KSLRAW.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31
 * saturation for the left shift and a rounding up operation for the right shift on a 32-bit data.
 *
 * **Description**:\n
 * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
 * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31].
 * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. The right-shifted
 * result is added a 1 to the most significant discarded bit position for rounding effect. After the shift,
 * saturation, or rounding, the final result is bit-31 sign-extended and written to Rd. If any saturation
 * happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect the operation of this
 * instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[5:0] < 0) {
 *   sa = -Rs2[5:0];
 *   sa = (sa == 32)? 31 : sa;
 *   res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
 *   rst[31:0] = res[31:0];
 * } else {
 *   sa = Rs2[5:0];
 *   tmp = Rs1.W[0] <<(logic) sa;
 *   if (tmp > (2^31)-1) {
 *     rst[31:0] = (2^31)-1;
 *     OV = 1;
 *   } else if (tmp < -2^31) {
 *     rst[31:0] = -2^31;
 *     OV = 1
 *   } else {
 *     rst[31:0] = tmp[31:0];
 *   }
 * }
 * Rd = rst[31:0]; // RV32
 * Rd = SE64(rst[31:0]); // RV64
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KSLRAW_U(int a, int b)
{
    long result;
    __ASM volatile("kslraw.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.62. KSLRAW.u ===== */

/* ===== Inline Function Start for 3.63. KSTAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief KSTAS16 (SIMD 16-bit Signed Saturating Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSTAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating addition and 16-bit signed integer element
 * saturating subtraction in a 32-bit chunk simultaneously. Operands are from corresponding
 * positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
 * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it
 * subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed
 * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
 * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for
 * subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
 * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSTAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.63. KSTAS16 ===== */

/* ===== Inline Function Start for 3.64. KSTSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief KSTSA16 (SIMD 16-bit Signed Saturating Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSTSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element
 * saturating addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in
 * 32-bit chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks
 * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it
 * adds the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 with the 16-bit signed integer
 * element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number range (-2^15
 * <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
 * written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd for
 * addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
 * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSTSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.64. KSTSA16 ===== */

/* ===== Inline Function Start for 3.65. KSUB8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief KSUB8 (SIMD 8-bit Signed Saturating Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSUB8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
 * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 27
 * -1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.B[x] - Rs2.B[x];
 * if (res[x] > (2^7)-1) {
 *   res[x] = (2^7)-1;
 *   OV = 1;
 * } else if (res[x] < -2^7) {
 *   res[x] = -2^7;
 *   OV = 1;
 * }
 * Rd.B[x] = res[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSUB8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.65. KSUB8 ===== */

/* ===== Inline Function Start for 3.66. KSUB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief KSUB16 (SIMD 16-bit Signed Saturating Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KSUB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
 * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <=
 * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.H[x] - Rs2.H[x];
 * if (res[x] > (2^15)-1) {
 *   res[x] = (2^15)-1;
 *   OV = 1;
 * } else if (res[x] < -2^15) {
 *   res[x] = -2^15;
 *   OV = 1;
 * }
 * Rd.H[x] = res[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSUB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.66. KSUB16 ===== */

/* ===== Inline Function Start for 3.67. KSUB64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief KSUB64 (64-bit Signed Saturating Subtraction)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * KSUB64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform a 64-bit signed integer subtraction. The result is saturated to the Q63 range.
 *
 * **RV32 Description**:\n
 * This instruction subtracts the 64-bit signed integer of an even/odd pair of
 * registers specified by Rs2(4,1) from the 64-bit signed integer of an even/odd pair of registers
 * specified by Rs1(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is
 * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd
 * pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * This instruction subtracts the 64-bit signed integer of Rs2 from the 64-bit signed
 * integer of Rs1. If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated
 * to the range and the OV bit is set to 1. The saturated result is then written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
 * result = R[a_H].R[a_L] - R[b_H].R[b_L];
 * if (result > (2^63)-1) {
 *   result = (2^63)-1; OV = 1;
 * } else if (result < -2^63) {
 *   result = -2^63; OV = 1;
 * }
 * R[t_H].R[t_L] = result;
 * RV64:
 * result = Rs1 - Rs2;
 * if (result > (2^63)-1) {
 *   result = (2^63)-1; OV = 1;
 * } else if (result < -2^63) {
 *   result = -2^63; OV = 1;
 * }
 * Rd = result;
 * ~~~
 *
 * \param [in]  a    long long type of value stored in a
 * \param [in]  b    long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_KSUB64(long long a, long long b)
{
    long long result;
    __ASM volatile("ksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.67. KSUB64 ===== */

/* ===== Inline Function Start for 3.68. KSUBH ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
 * \brief KSUBH (Signed Subtraction with Q15 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KSUBH Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Subtract the signed lower 32-bit content of two registers with Q15 saturation.
 *
 * **Description**:\n
 * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit
 * content of Rs1. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then
 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] - Rs2.W[0];
 * if (tmp > (2^15)-1) {
 *   res = (2^15)-1;
 *   OV = 1;
 * } else if (tmp < -2^15) {
 *   res = -2^15;
 *   OV = 1
 * } else {
 *   res = tmp;
 * }
 * Rd = SE(res[15:0]);
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KSUBH(int a, int b)
{
    long result;
    __ASM volatile("ksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.68. KSUBH ===== */

/* ===== Inline Function Start for 3.69. KSUBW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief KSUBW (Signed Subtraction with Q31 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * KSUBW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Subtract the signed lower 32-bit content of two registers with Q31 saturation.
 *
 * **Description**:\n
 * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit
 * content of Rs1. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then
 * sign-extened and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] - Rs2.W[0];
 * if (tmp > (2^31)-1) {
 *   res = (2^31)-1;
 *   OV = 1;
 * } else if (tmp < -2^31) {
 * res = -2^31;
 *   OV = 1
 * } else {
 *   res = tmp;
 * }
 * Rd = res[31:0]; // RV32
 * Rd = SE(res[31:0]); // RV64
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KSUBW(int a, int b)
{
    long result;
    __ASM volatile("ksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.69. KSUBW ===== */

/* ===== Inline Function Start for 3.70.1. KWMMUL ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief KWMMUL (SIMD Saturating MSW Signed Multiply Word & Double)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KWMMUL Rd, Rs1, Rs2
 * KWMMUL.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit,
 * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally
 * rounds up the multiplication results from the most signification discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
 * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
 * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
 * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u`
 * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
 * 30 before the shift and saturation operations.
 *
 * **Operations**:\n
 * ~~~
 * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) {
 *   Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 *   if (`.u` form) {
 *     Round[x][33:0] = Mres[x][63:30] + 1;
 *     Rd.W[x] = Round[x][32:1];
 *   } else {
 *     Rd.W[x] = Mres[x][62:31];
 *   }
 * } else {
 *   Rd.W[x] = 0x7fffffff;
 *   OV = 1;
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KWMMUL(long a, long b)
{
    long result;
    __ASM volatile("kwmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.70.1. KWMMUL ===== */

/* ===== Inline Function Start for 3.70.2. KWMMUL.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief KWMMUL.u (SIMD Saturating MSW Signed Multiply Word & Double with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * KWMMUL Rd, Rs1, Rs2
 * KWMMUL.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit,
 * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally
 * rounds up the multiplication results from the most signification discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
 * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
 * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
 * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u`
 * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
 * 30 before the shift and saturation operations.
 *
 * **Operations**:\n
 * ~~~
 * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) {
 *   Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 *   if (`.u` form) {
 *     Round[x][33:0] = Mres[x][63:30] + 1;
 *     Rd.W[x] = Round[x][32:1];
 *   } else {
 *     Rd.W[x] = Mres[x][62:31];
 *   }
 * } else {
 *   Rd.W[x] = 0x7fffffff;
 *   OV = 1;
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KWMMUL_U(long a, long b)
{
    long result;
    __ASM volatile("kwmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.70.2. KWMMUL.u ===== */

/* ===== Inline Function Start for 3.71. MADDR32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief MADDR32 (Multiply and Add to 32-Bit Word)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * MADDR32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit contents of two registers and add the lower 32-bit multiplication result
 * to the 32-bit content of a destination register. Write the final result back to the destination register.
 *
 * **Description**:\n
 * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2. It adds the
 * lower 32-bit multiplication result to the lower 32-bit content of Rd and writes the final result (RV32)
 * or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either signed or
 * unsigned integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mresult = Rs1 * Rs2;
 * Rd = Rd + Mresult.W[0];
 * RV64:
 * Mresult = Rs1.W[0] * Rs2.W[0];
 * tres[31:0] = Rd.W[0] + Mresult.W[0];
 * Rd = SE64(tres[31:0]);
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_MADDR32(unsigned long t, unsigned long a, unsigned long b)
{
    __ASM volatile("maddr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.71. MADDR32 ===== */

/* ===== Inline Function Start for 3.72. MAXW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief MAXW (32-bit Signed Word Maximum)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * MAXW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Get the larger value from the 32-bit contents of two general registers.
 *
 * **Description**:\n
 * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the
 * larger value as the result, and writes the result to Rd.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs1.W[0] >= Rs2.W[0]) {
 *   Rd = SE(Rs1.W[0]);
 * } else {
 *   Rd = SE(Rs2.W[0]);
 * }
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_MAXW(int a, int b)
{
    long result;
    __ASM volatile("maxw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.72. MAXW ===== */

/* ===== Inline Function Start for 3.73. MINW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief MINW (32-bit Signed Word Minimum)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * MINW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Get the smaller value from the 32-bit contents of two general registers.
 *
 * **Description**:\n
 * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the
 * smaller value as the result, and writes the result to Rd.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs1.W[0] >= Rs2.W[0]) { Rd = SE(Rs2.W[0]); } else { Rd = SE(Rs1.W[0]); }
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_MINW(int a, int b)
{
    long result;
    __ASM volatile("minw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.73. MINW ===== */

/* ===== Inline Function Start for 3.74. MSUBR32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief MSUBR32 (Multiply and Subtract from 32-Bit Word)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * MSUBR32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit contents of two registers and subtract the lower 32-bit multiplication
 * result from the 32-bit content of a destination register. Write the final result back to the destination
 * register.
 *
 * **Description**:\n
 * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2, subtracts
 * the lower 32-bit multiplication result from the lower 32-bit content of Rd, then writes the final
 * result (RV32) or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either
 * signed or unsigned integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mresult = Rs1 * Rs2;
 * Rd = Rd - Mresult.W[0];
 * RV64:
 * Mresult = Rs1.W[0] * Rs2.W[0];
 * tres[31:0] = Rd.W[0] - Mresult.W[0];
 * Rd = SE64(tres[31:0]);
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_MSUBR32(unsigned long t, unsigned long a, unsigned long b)
{
    __ASM volatile("msubr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.74. MSUBR32 ===== */

/* ===== Inline Function Start for 3.75. MULR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief MULR64 (Multiply Word Unsigned to 64-bit Data)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * MULR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit unsigned integer contents of two registers and write the 64-bit result.
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit content of Rs1 with that of Rs2 and writes the 64-bit
 * multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d determines the
 * even/odd pair group of the two registers. Specifically, the register pair includes register 2d and
 * 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2 and writes the 64-bit
 * multiplication result to Rd.
 * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mresult = CONCAT(1`b0,Rs1) u* CONCAT(1`b0,Rs2);
 * R[Rd(4,1).1(0)][31:0] = Mresult[63:32];
 * R[Rd(4,1).0(0)][31:0] = Mresult[31:0];
 * RV64:
 * Rd = Mresult[63:0];
 * Mresult = CONCAT(1`b0,Rs1.W[0]) u* CONCAT(1`b0,Rs2.W[0]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_MULR64(unsigned long a, unsigned long b)
{
    unsigned long long result;
    __ASM volatile("mulr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.75. MULR64 ===== */

/* ===== Inline Function Start for 3.76. MULSR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief MULSR64 (Multiply Word Signed to 64-bit Data)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * MULSR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit signed integer contents of two registers and write the 64-bit result.
 *
 * **RV32 Description**:\n
 * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and
 * writes the 64-bit multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d
 * determines the even/odd pair group of the two registers. Specifically, the register pair includes
 * register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and
 * writes the 64-bit multiplication result to Rd.
 * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mresult = Ra s* Rb;
 * R[Rd(4,1).1(0)][31:0] = Mresult[63:32];
 * R[Rd(4,1).0(0)][31:0] = Mresult[31:0];
 * RV64:
 * Mresult = Ra.W[0] s* Rb.W[0];
 * Rd = Mresult[63:0];
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_MULSR64(long a, long b)
{
    long long result;
    __ASM volatile("mulsr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.76. MULSR64 ===== */

/* ===== Inline Function Start for 3.77. PBSAD ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
 * \brief PBSAD (Parallel Byte Sum of Absolute Difference)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * PBSAD Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Calculate the sum of absolute difference of unsigned 8-bit data elements.
 *
 * **Description**:\n
 * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. Then
 * it adds the absolute value of each difference together and writes the result to Rd.
 *
 * **Operations**:\n
 * ~~~
 * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]);
 * Rd = SUM(absdiff[x]);
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PBSAD(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pbsad %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.77. PBSAD ===== */

/* ===== Inline Function Start for 3.78. PBSADA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
 * \brief PBSADA (Parallel Byte Sum of Absolute Difference Accum)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * PBSADA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Calculate the sum of absolute difference of four unsigned 8-bit data elements and
 * accumulate it into a register.
 *
 * **Description**:\n
 * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. It
 * then adds the absolute value of each difference together along with the content of Rd and writes the
 * accumulated result back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]);
 * Rd = Rd + SUM(absdiff[x]);
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PBSADA(unsigned long t, unsigned long a, unsigned long b)
{
    __ASM volatile("pbsada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.78. PBSADA ===== */

/* ===== Inline Function Start for 3.79.1. PKBB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
 * \brief PKBB16 (Pack Two 16-bit Data from Both Bottom Half)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * PKBB16 Rd, Rs1, Rs2
 * PKBT16 Rd, Rs1, Rs2
 * PKTT16 Rd, Rs1, Rs2
 * PKTB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * PKBB16: bottom.bottom
 * * PKBT16 bottom.top
 * * PKTT16 top.top
 * * PKTB16 top.bottom
 *
 * **Description**:\n
 * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
 * Rd.W[x] [15:0].
 * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKBB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pkbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.79.1. PKBB16 ===== */

/* ===== Inline Function Start for 3.79.2. PKBT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
 * \brief PKBT16 (Pack Two 16-bit Data from Bottom and Top Half)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * PKBB16 Rd, Rs1, Rs2
 * PKBT16 Rd, Rs1, Rs2
 * PKTT16 Rd, Rs1, Rs2
 * PKTB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * PKBB16: bottom.bottom
 * * PKBT16 bottom.top
 * * PKTT16 top.top
 * * PKTB16 top.bottom
 *
 * **Description**:\n
 * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
 * Rd.W[x] [15:0].
 * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKBT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pkbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.79.2. PKBT16 ===== */

/* ===== Inline Function Start for 3.79.3. PKTT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
 * \brief PKTT16 (Pack Two 16-bit Data from Both Top Half)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * PKBB16 Rd, Rs1, Rs2
 * PKBT16 Rd, Rs1, Rs2
 * PKTT16 Rd, Rs1, Rs2
 * PKTB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * PKBB16: bottom.bottom
 * * PKBT16 bottom.top
 * * PKTT16 top.top
 * * PKTB16 top.bottom
 *
 * **Description**:\n
 * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
 * Rd.W[x] [15:0].
 * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKTT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pktt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.79.3. PKTT16 ===== */

/* ===== Inline Function Start for 3.79.4. PKTB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
 * \brief PKTB16 (Pack Two 16-bit Data from Top and Bottom Half)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * PKBB16 Rd, Rs1, Rs2
 * PKBT16 Rd, Rs1, Rs2
 * PKTT16 Rd, Rs1, Rs2
 * PKTB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * PKBB16: bottom.bottom
 * * PKBT16 bottom.top
 * * PKTT16 top.top
 * * PKTB16 top.bottom
 *
 * **Description**:\n
 * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
 * Rd.W[x] [15:0].
 * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKTB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pktb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.79.4. PKTB16 ===== */

/* ===== Inline Function Start for 3.80. RADD8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief RADD8 (SIMD 8-bit Signed Halving Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RADD8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer element additions simultaneously. The element results are halved
 * to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
 * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
 * Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Rs1 = 0x7F, Rs2 = 0x7F, Rd = 0x7F
 * * Rs1 = 0x80, Rs2 = 0x80, Rd = 0x80
 * * Rs1 = 0x40, Rs2 = 0x80, Rd = 0xE0
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] + Rs2.B[x]) s>> 1; for RV32: x=3...0, for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RADD8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("radd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.80. RADD8 ===== */

/* ===== Inline Function Start for 3.81. RADD16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief RADD16 (SIMD 16-bit Signed Halving Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RADD16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element additions simultaneously. The results are halved to avoid
 * overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
 * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
 * Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Rs1 = 0x7FFF, Rs2 = 0x7FFF, Rd = 0x7FFF
 * * Rs1 = 0x8000, Rs2 = 0x8000, Rd = 0x8000
 * * Rs1 = 0x4000, Rs2 = 0x8000, Rd = 0xE000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) s>> 1; for RV32: x=1...0, for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RADD16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("radd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.81. RADD16 ===== */

/* ===== Inline Function Start for 3.82. RADD64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief RADD64 (64-bit Signed Halving Addition)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * RADD64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add two 64-bit signed integers. The result is halved to avoid overflow or saturation.
 *
 * **RV32 Description**:\n
 * This instruction adds the 64-bit signed integer of an even/odd pair of registers
 * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by
 * Rs2(4,1). The 64-bit addition result is first arithmetically right-shifted by 1 bit and then written to an
 * even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
 * pair includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed
 * integer in Rs2. The 64-bit addition result is first arithmetically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
 * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) s>> 1;
 * RV64:
 * Rd = (Rs1 + Rs2) s>> 1;
 * ~~~
 *
 * \param [in]  a    long long type of value stored in a
 * \param [in]  b    long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_RADD64(long long a, long long b)
{
    long long result;
    __ASM volatile("radd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.82. RADD64 ===== */

/* ===== Inline Function Start for 3.83. RADDW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief RADDW (32-bit Signed Halving Addition)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * RADDW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add 32-bit signed integers and the results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the first 32-bit signed integer in Rs1 with the first 32-bit signed
 * integer in Rs2. The result is first arithmetically right-shifted by 1 bit and then sign-extended and
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF, Rd = 0x7FFFFFFF
 * * Rs1 = 0x80000000, Rs2 = 0x80000000, Rd = 0x80000000
 * * Rs1 = 0x40000000, Rs2 = 0x80000000, Rd = 0xE0000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1;
 * RV64:
 * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1;
 * Rd[63:0] = SE(resw[31:0]);
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_RADDW(int a, int b)
{
    long result;
    __ASM volatile("raddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.83. RADDW ===== */

/* ===== Inline Function Start for 3.84. RCRAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief RCRAS16 (SIMD 16-bit Signed Halving Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RCRAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in
 * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results
 * are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
 * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit
 * signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer element in
 * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and
 * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD16` and `RSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) s>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) s>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RCRAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.84. RCRAS16 ===== */

/* ===== Inline Function Start for 3.85. RCRSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief RCRSA16 (SIMD 16-bit Signed Halving Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RCRSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in
 * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results
 * are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks
 * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit
 * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in
 * [31:16] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and
 * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD16` and `RSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) s>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) s>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RCRSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.85. RCRSA16 ===== */

/* ===== Inline Function Start for 3.86. RDOV ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC
 * \brief RDOV (Read OV flag)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * RDOV Rd  # pseudo mnemonic
 * ~~~
 *
 * **Purpose**:\n
 * This pseudo instruction is an alias to `CSRR Rd, ucode` instruction which maps to the real
 * instruction of `CSRRS Rd, ucode, x0`.
 *
 *
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RDOV(void)
{
    unsigned long result;
    __ASM volatile("rdov %0" : "=r"(result));
    return result;
}
/* ===== Inline Function End for 3.86. RDOV ===== */

/* ===== Inline Function Start for 3.87. RSTAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief RSTAS16 (SIMD 16-bit Signed Halving Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RSTAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in
 * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The
 * results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
 * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit
 * signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer element in
 * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and
 * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD16` and `RSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) s>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) s>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RSTAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.87. RSTAS16 ===== */

/* ===== Inline Function Start for 3.88. RSTSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief RSTSA16 (SIMD 16-bit Signed Halving Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RSTSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in
 * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The
 * results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks
 * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit
 * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in
 * [15:0] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and then
 * written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD16` and `RSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) s>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) s>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RSTSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.88. RSTSA16 ===== */

/* ===== Inline Function Start for 3.89. RSUB8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief RSUB8 (SIMD 8-bit Signed Halving Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RSUB8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer element subtractions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
 * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Rs1 = 0x7F, Rs2 = 0x80, Rd = 0x7F
 * * Rs1 = 0x80, Rs2 = 0x7F, Rd = 0x80
 * * Rs1= 0x80, Rs2 = 0x40, Rd = 0xA0
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) s>> 1;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RSUB8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rsub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.89. RSUB8 ===== */

/* ===== Inline Function Start for 3.90. RSUB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief RSUB16 (SIMD 16-bit Signed Halving Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * RSUB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element subtractions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
 * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFF, Rb = 0x8000, Rt = 0x7FFF
 * * Ra = 0x8000, Rb = 0x7FFF, Rt = 0x8000
 * * Ra = 0x8000, Rb = 0x4000, Rt = 0xA000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) s>> 1;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RSUB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.90. RSUB16 ===== */

/* ===== Inline Function Start for 3.91. RSUB64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief RSUB64 (64-bit Signed Halving Subtraction)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * RSUB64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform a 64-bit signed integer subtraction. The result is halved to avoid overflow or
 * saturation.
 *
 * **RV32 Description**:\n
 * This instruction subtracts the 64-bit signed integer of an even/odd pair of
 * registers specified by Rb(4,1) from the 64-bit signed integer of an even/odd pair of registers
 * specified by Ra(4,1). The subtraction result is first arithmetically right-shifted by 1 bit and then
 * written to an even/odd pair of registers specified by Rt(4,1).
 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
 * pair includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction subtracts the 64-bit signed integer in Rs2 from the 64-bit signed
 * integer in Rs1. The 64-bit subtraction result is first arithmetically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
 * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) s>> 1;
 * RV64:
 * Rd = (Rs1 - Rs2) s>> 1;
 * ~~~
 *
 * \param [in]  a    long long type of value stored in a
 * \param [in]  b    long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_RSUB64(long long a, long long b)
{
    long long result;
    __ASM volatile("rsub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.91. RSUB64 ===== */

/* ===== Inline Function Start for 3.92. RSUBW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief RSUBW (32-bit Signed Halving Subtraction)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * RSUBW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Subtract 32-bit signed integers and the result is halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit
 * signed integer in Rs1. The result is first arithmetically right-shifted by 1 bit and then sign-extended
 * and written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Rs1 = 0x7FFFFFFF, Rs2 = 0x80000000, Rd = 0x7FFFFFFF
 * * Rs1 = 0x80000000, Rs2 = 0x7FFFFFFF, Rd = 0x80000000
 * * Rs1 = 0x80000000, Rs2 = 0x40000000, Rd = 0xA0000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1;
 * RV64:
 * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1;
 * Rd[63:0] = SE(resw[31:0]);
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_RSUBW(int a, int b)
{
    long result;
    __ASM volatile("rsubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.92. RSUBW ===== */

/* ===== Inline Function Start for 3.93. SCLIP8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief SCLIP8 (SIMD 8-bit Signed Clip Value)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SCLIP8 Rd, Rs1, imm3u[2:0]
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 8-bit signed integer elements of a register into a signed range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 8-bit signed integer elements stored in Rs1 into a signed
 * integer range between 2^imm3u-1 and -2^imm3u, and writes the limited results to Rd. For example, if
 * imm3u is 3, the 8-bit input values should be saturated between 7 and -8. If saturation is performed,
 * set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.B[x];
 * if (src > (2^imm3u)-1) {
 *   src = (2^imm3u)-1;
 *   OV = 1;
 * } else if (src < -2^imm3u) {
 *   src = -2^imm3u;
 *   OV = 1;
 * }
 * Rd.B[x] = src
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SCLIP8(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("sclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.93. SCLIP8 ===== */

/* ===== Inline Function Start for 3.94. SCLIP16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief SCLIP16 (SIMD 16-bit Signed Clip Value)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SCLIP16 Rd, Rs1, imm4u[3:0]
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 16-bit signed integer elements of a register into a signed range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 16-bit signed integer elements stored in Rs1 into a signed
 * integer range between 2imm4u-1 and -2imm4u, and writes the limited results to Rd. For example, if
 * imm4u is 3, the 16-bit input values should be saturated between 7 and -8. If saturation is performed,
 * set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.H[x];
 * if (src > (2^imm4u)-1) {
 *   src = (2^imm4u)-1;
 *   OV = 1;
 * } else if (src < -2^imm4u) {
 *   src = -2^imm4u;
 *   OV = 1;
 * }
 * Rd.H[x] = src
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SCLIP16(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("sclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.94. SCLIP16 ===== */

/* ===== Inline Function Start for 3.95. SCLIP32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
 * \brief SCLIP32 (SIMD 32-bit Signed Clip Value)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SCLIP32 Rd, Rs1, imm5u[4:0]
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 32-bit signed integer elements of a register into a signed range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 32-bit signed integer elements stored in Rs1 into a signed
 * integer range between 2imm5u-1 and -2imm5u, and writes the limited results to Rd. For example, if
 * imm5u is 3, the 32-bit input values should be saturated between 7 and -8. If saturation is performed,
 * set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.W[x];
 * if (src > (2^imm5u)-1) {
 *   src = (2^imm5u)-1;
 *   OV = 1;
 * } else if (src < -2^imm5u) {
 *   src = -2^imm5u;
 *   OV = 1;
 * }
 * Rd.W[x] = src
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
#define __RV_SCLIP32(a, b)    \
    ({    \
        long result;    \
        long __a = (long)(a);    \
        __ASM volatile("sclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.95. SCLIP32 ===== */

/* ===== Inline Function Start for 3.96. SCMPLE8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
 * \brief SCMPLE8 (SIMD 8-bit Signed Compare Less Than & Equal)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SCMPLE8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer elements less than & equal comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
 * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is
 * true, the result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to
 * Rd
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] {le} Rs2.B[x])? 0xff : 0x0;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SCMPLE8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("scmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.96. SCMPLE8 ===== */

/* ===== Inline Function Start for 3.97. SCMPLE16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
 * \brief SCMPLE16 (SIMD 16-bit Signed Compare Less Than & Equal)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SCMPLE16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer elements less than & equal comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
 * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is
 * true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written
 * to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] {le} Rs2.H[x])? 0xffff : 0x0;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SCMPLE16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("scmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.97. SCMPLE16 ===== */

/* ===== Inline Function Start for 3.98. SCMPLT8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
 * \brief SCMPLT8 (SIMD 8-bit Signed Compare Less Than)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SCMPLT8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer elements less than comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
 * signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
 * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? 0xff : 0x0;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SCMPLT8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("scmplt8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.98. SCMPLT8 ===== */

/* ===== Inline Function Start for 3.99. SCMPLT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
 * \brief SCMPLT16 (SIMD 16-bit Signed Compare Less Than)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SCMPLT16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer elements less than comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit signed integer elements in Rs1 with the two 16-
 * bit signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
 * result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? 0xffff : 0x0;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SCMPLT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("scmplt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.99. SCMPLT16 ===== */

/* ===== Inline Function Start for 3.100. SLL8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SLL8 (SIMD 8-bit Shift Left Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SLL8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical left shift operations simultaneously. The shift amount is a
 * variable from a GPR.
 *
 * **Description**:\n
 * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
 * The shifted out bits are filled with zero and the shift amount is specified by the low-order 3-bits of
 * the value in the Rs2 register.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[2:0];
 * Rd.B[x] = Rs1.B[x] << sa;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SLL8(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("sll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.100. SLL8 ===== */

/* ===== Inline Function Start for 3.101. SLLI8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SLLI8 (SIMD 8-bit Shift Left Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SLLI8 Rd, Rs1, imm3u
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical left shift operations simultaneously. The shift amount is an
 * immediate value.
 *
 * **Description**:\n
 * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
 * The shifted out bits are filled with zero and the shift amount is specified by the imm3u constant.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm3u[2:0];
 * Rd.B[x] = Rs1.B[x] << sa;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SLLI8(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("slli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.101. SLLI8 ===== */

/* ===== Inline Function Start for 3.102. SLL16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SLL16 (SIMD 16-bit Shift Left Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SLL16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical left shift operations simultaneously. The shift amount is a
 * variable from a GPR.
 *
 * **Description**:\n
 * The 16-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
 * The shifted out bits are filled with zero and the shift amount is specified by the low-order 4-bits of
 * the value in the Rs2 register.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[3:0];
 * Rd.H[x] = Rs1.H[x] << sa;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SLL16(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("sll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.102. SLL16 ===== */

/* ===== Inline Function Start for 3.103. SLLI16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SLLI16 (SIMD 16-bit Shift Left Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SLLI16 Rd, Rs1, imm4[3:0]
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit element logical left shift operations simultaneously. The shift amount is an
 * immediate value.
 *
 * **Description**:\n
 * The 16-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with
 * zero and the shift amount is specified by the imm4[3:0] constant. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm4[3:0];
 * Rd.H[x] = Rs1.H[x] << sa;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SLLI16(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("slli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.103. SLLI16 ===== */

/* ===== Inline Function Start for 3.104. SMAL ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMAL (Signed Multiply Halfs & Add 64-bit)
 * \details
 * **Type**: Partial-SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMAL Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed bottom 16-bit content of the 32-bit elements of a register with the top
 * 16-bit content of the same 32-bit elements of the same register, and add the results with a 64-bit
 * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
 * to another even/odd pair of registers (RV32) or a register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the bottom 16-bit content of the lower 32-bit of Rs2 with the top 16-bit
 * content of the lower 32-bit of Rs2 and adds the result with the 64-bit value of an even/odd pair of
 * registers specified by Rs1(4,1). The 64-bit addition result is written back to an even/odd pair of
 * registers specified by Rd(4,1). The 16-bit values of Rs2, and the 64-bit value of the Rs1(4,1) register-
 * pair are treated as signed integers.
 * Rx(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs2 with the top 16-bit
 * content of the same 32-bit elements of Rs2 and adds the results with the 64-bit value of Rs1. The 64-
 * bit addition result is written back to Rd. The 16-bit values of Rs2, and the 64-bit value of Rs1 are
 * treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mres[31:0] = Rs2.H[1] * Rs2.H[0];
 * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1); +
 * Idx2 = CONCAT(Rd(4,1),1'b0); Idx3 = CONCAT(Rd(4,1),1'b1);
 * R[Idx3].R[Idx2] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
 * RV64:
 * Mres[0][31:0] = Rs2.W[0].H[1] * Rs2.W[0].H[0];
 * Mres[1][31:0] = Rs2.W[1].H[1] * Rs2.W[1].H[0];
 * Rd = Rs1 + SE64(Mres[1][31:0]) + SE64(Mres[0][31:0]);
 * ~~~
 *
 * \param [in]  a    long long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMAL(long long a, unsigned long b)
{
    long long result;
    __ASM volatile("smal %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.104. SMAL ===== */

/* ===== Inline Function Start for 3.105.1. SMALBB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALBB (Signed Multiply Bottom Halfs & Add 64-bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALBB Rd, Rs1, Rs2
 * SMALBT Rd, Rs1, Rs2
 * SMALTT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
 * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
 * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
 * to the register-pair (RV32) or the register (RV64).
 * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
 * * SMALBT rt pair + bottom*top (all 32-bit elements)
 * * SMALTT rt pair + top*top (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2.
 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
 * content of Rs2.
 * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
 * of Rs2.
 * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2.
 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2.
 * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
 * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
 * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
 * RV64:
 * // SMALBB
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
 * // SMALBT
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
 * // SMALTT
 * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALBB(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smalbb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.105.1. SMALBB ===== */

/* ===== Inline Function Start for 3.105.2. SMALBT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALBT (Signed Multiply Bottom Half & Top Half & Add 64-bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALBB Rd, Rs1, Rs2
 * SMALBT Rd, Rs1, Rs2
 * SMALTT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
 * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
 * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
 * to the register-pair (RV32) or the register (RV64).
 * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
 * * SMALBT rt pair + bottom*top (all 32-bit elements)
 * * SMALTT rt pair + top*top (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2.
 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
 * content of Rs2.
 * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
 * of Rs2.
 * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2.
 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2.
 * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
 * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
 * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
 * RV64:
 * // SMALBB
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
 * // SMALBT
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
 * // SMALTT
 * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALBT(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smalbt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.105.2. SMALBT ===== */

/* ===== Inline Function Start for 3.105.3. SMALTT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALTT (Signed Multiply Top Halfs & Add 64-bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALBB Rd, Rs1, Rs2
 * SMALBT Rd, Rs1, Rs2
 * SMALTT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
 * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
 * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
 * to the register-pair (RV32) or the register (RV64).
 * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
 * * SMALBT rt pair + bottom*top (all 32-bit elements)
 * * SMALTT rt pair + top*top (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2.
 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
 * content of Rs2.
 * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
 * of Rs2.
 * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2.
 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2.
 * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
 * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
 * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
 * RV64:
 * // SMALBB
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
 * // SMALBT
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
 * // SMALTT
 * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALTT(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smaltt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.105.3. SMALTT ===== */

/* ===== Inline Function Start for 3.106.1. SMALDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALDA (Signed Multiply Two Halfs and Two Adds 64-bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALDA Rd, Rs1, Rs2
 * SMALXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together.
 * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements)
 * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
 * the top 16-bit content of Rs2 with unlimited precision.
 * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
 * with the top 16-bit content of Rs2 with unlimited precision.
 * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64-
 * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64-
 * bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
 * bit elements of Rs2 with unlimited precision.
 * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
 * 32-bit elements of Rs2 with unlimited precision.
 * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
 * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * // SMALDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
 * // SMALXDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]);
 * RV64:
 * // SMALDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * // SMALXDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
 * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) +
 * SE64(Mres1[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALDA(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smalda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.106.1. SMALDA ===== */

/* ===== Inline Function Start for 3.106.2. SMALXDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALXDA (Signed Crossed Multiply Two Halfs and Two Adds 64-bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALDA Rd, Rs1, Rs2
 * SMALXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together.
 * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements)
 * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
 * the top 16-bit content of Rs2 with unlimited precision.
 * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
 * with the top 16-bit content of Rs2 with unlimited precision.
 * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64-
 * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64-
 * bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
 * bit elements of Rs2 with unlimited precision.
 * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
 * 32-bit elements of Rs2 with unlimited precision.
 * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
 * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * RV32:
 * // SMALDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
 * // SMALXDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]);
 * RV64:
 * // SMALDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * // SMALXDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
 * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) +
 * SE64(Mres1[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALXDA(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smalxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.106.2. SMALXDA ===== */

/* ===== Inline Function Start for 3.107.1. SMALDS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALDS (Signed Multiply Two Halfs & Subtract & Add 64-bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALDS Rd, Rs1, Rs2
 * SMALDRS Rd, Rs1, Rs2
 * SMALXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
 * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
 * written back to the register-pair.
 * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
 * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
 * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the top 16-bit content of Rs2.
 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
 * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
 * with the bottom 16-bit content of Rs2.
 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the bottom 16-bit content of Rs2.
 * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
 * of the 32-bit elements of Rs2.
 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
 * the 32-bit elements of Rs2.
 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
 * content of the 32-bit elements of Rs2.
 * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
 * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
 * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
 * * RV64:
 * // SMALDS
 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * // SMALDRS
 * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * // SMALXDS
 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALDS(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smalds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.107.1. SMALDS ===== */

/* ===== Inline Function Start for 3.107.2. SMALDRS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALDRS (Signed Multiply Two Halfs & Reverse Subtract & Add 64- bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALDS Rd, Rs1, Rs2
 * SMALDRS Rd, Rs1, Rs2
 * SMALXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
 * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
 * written back to the register-pair.
 * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
 * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
 * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the top 16-bit content of Rs2.
 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
 * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
 * with the bottom 16-bit content of Rs2.
 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the bottom 16-bit content of Rs2.
 * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
 * of the 32-bit elements of Rs2.
 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
 * the 32-bit elements of Rs2.
 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
 * content of the 32-bit elements of Rs2.
 * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
 * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
 * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
 * * RV64:
 * // SMALDS
 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * // SMALDRS
 * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * // SMALXDS
 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALDRS(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smaldrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.107.2. SMALDRS ===== */

/* ===== Inline Function Start for 3.107.3. SMALXDS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMALXDS (Signed Crossed Multiply Two Halfs & Subtract & Add 64- bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMALDS Rd, Rs1, Rs2
 * SMALDRS Rd, Rs1, Rs2
 * SMALXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
 * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
 * written back to the register-pair.
 * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
 * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
 * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the top 16-bit content of Rs2.
 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
 * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
 * with the bottom 16-bit content of Rs2.
 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the bottom 16-bit content of Rs2.
 * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
 * of the 32-bit elements of Rs2.
 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
 * the 32-bit elements of Rs2.
 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
 * content of the 32-bit elements of Rs2.
 * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
 * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
 * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
 * * RV64:
 * // SMALDS
 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * // SMALDRS
 * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * // SMALXDS
 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMALXDS(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smalxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.107.3. SMALXDS ===== */

/* ===== Inline Function Start for 3.108. SMAR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief SMAR64 (Signed Multiply and Add to 64-Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMAR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication
 * result to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is written
 * back to the pair of registers (RV32) or a register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds
 * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by
 * Rd(4,1). The addition result is written back to the even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
 * adds the 64-bit multiplication results to the 64-bit signed data of Rd. The addition result is written
 * back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2);
 * * RV64:
 * Rd = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMAR64(long long t, long a, long b)
{
    __ASM volatile("smar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.108. SMAR64 ===== */

/* ===== Inline Function Start for 3.109. SMAQA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
 * \brief SMAQA (Signed Multiply Four Bytes with 32-bit Adds)
 * \details
 * **Type**: Partial-SIMD (Reduction)
 *
 * **Syntax**:\n
 * ~~~
 * SMAQA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four signed 8-bit multiplications from 32-bit chunks of two registers; and then adds
 * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together.
 *
 * **Description**:\n
 * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
 * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the signed
 * content of the corresponding 32-bit chunks of Rd. The final results are written back to the
 * corresponding 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rd.W[x] +
 *    (Rs1.W[x].B[3] s* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] s* Rs2.W[x].B[2]) +
 *    (Rs1.W[x].B[1] s* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] s* Rs2.W[x].B[0]);
 * Rd.W[x] = res[x];
 * for RV32: x=0,
 * for RV64: x=1,0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMAQA(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.109. SMAQA ===== */

/* ===== Inline Function Start for 3.110. SMAQA.SU ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
 * \brief SMAQA.SU (Signed and Unsigned Multiply Four Bytes with 32-bit Adds)
 * \details
 * **Type**: Partial-SIMD (Reduction)
 *
 * **Syntax**:\n
 * ~~~
 * SMAQA.SU Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four `signed x unsigned` 8-bit multiplications from 32-bit chunks of two registers; and
 * then adds the four 16-bit results and the content of corresponding 32-bit chunks of a third register
 * together.
 *
 * **Description**:\n
 * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
 * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
 * signed content of the corresponding 32-bit chunks of Rd. The final results are written back to the
 * corresponding 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rd.W[x] +
 *    (Rs1.W[x].B[3] su* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] su* Rs2.W[x].B[2]) +
 *    (Rs1.W[x].B[1] su* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] su* Rs2.W[x].B[0]);
 * Rd.W[x] = res[x];
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMAQA_SU(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.110. SMAQA.SU ===== */

/* ===== Inline Function Start for 3.111. SMAX8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief SMAX8 (SIMD 8-bit Signed Maximum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMAX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer elements finding maximum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
 * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] > Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SMAX8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("smax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.111. SMAX8 ===== */

/* ===== Inline Function Start for 3.112. SMAX16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief SMAX16 (SIMD 16-bit Signed Maximum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMAX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer elements finding maximum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
 * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] > Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SMAX16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("smax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.112. SMAX16 ===== */

/* ===== Inline Function Start for 3.113.1. SMBB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief SMBB16 (SIMD Signed Multiply Bottom Half & Bottom Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMBB16 Rd, Rs1, Rs2
 * SMBT16 Rd, Rs1, Rs2
 * SMTT16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
 * bit content of the 32-bit elements of another register and write the result to a third register.
 * * SMBB16: W[x].bottom*W[x].bottom
 * * SMBT16: W[x].bottom *W[x].top
 * * SMTT16: W[x].top * W[x].top
 *
 * **Description**:\n
 * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2.
 * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2.
 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
 * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMBB16(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.113.1. SMBB16 ===== */

/* ===== Inline Function Start for 3.113.2. SMBT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief SMBT16 (SIMD Signed Multiply Bottom Half & Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMBB16 Rd, Rs1, Rs2
 * SMBT16 Rd, Rs1, Rs2
 * SMTT16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
 * bit content of the 32-bit elements of another register and write the result to a third register.
 * * SMBB16: W[x].bottom*W[x].bottom
 * * SMBT16: W[x].bottom *W[x].top
 * * SMTT16: W[x].top * W[x].top
 *
 * **Description**:\n
 * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2.
 * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2.
 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
 * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMBT16(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.113.2. SMBT16 ===== */

/* ===== Inline Function Start for 3.113.3. SMTT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief SMTT16 (SIMD Signed Multiply Top Half & Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMBB16 Rd, Rs1, Rs2
 * SMBT16 Rd, Rs1, Rs2
 * SMTT16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
 * bit content of the 32-bit elements of another register and write the result to a third register.
 * * SMBB16: W[x].bottom*W[x].bottom
 * * SMBT16: W[x].bottom *W[x].top
 * * SMTT16: W[x].top * W[x].top
 *
 * **Description**:\n
 * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2.
 * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2.
 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
 * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMTT16(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.113.3. SMTT16 ===== */

/* ===== Inline Function Start for 3.114.1. SMDS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief SMDS (SIMD Signed Multiply Two Halfs and Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMDS Rd, Rs1, Rs2
 * SMDRS Rd, Rs1, Rs2
 * SMXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * perform a subtraction operation between the two 32-bit results.
 * * SMDS: top*top - bottom*bottom (per 32-bit element)
 * * SMDRS: bottom*bottom - top*top (per 32-bit element)
 * * SMXDS: top*bottom - bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
 * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
 * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
 * 32-bit elements of Rs2.
 * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
 * the 32-bit elements of Rs2.
 * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
 * content of the 32-bit elements of Rs2.
 * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
 * multiplication are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * * SMDS:
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * * SMDRS:
 * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
 * * SMXDS:
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMDS(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.114.1. SMDS ===== */

/* ===== Inline Function Start for 3.114.2. SMDRS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief SMDRS (SIMD Signed Multiply Two Halfs and Reverse Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMDS Rd, Rs1, Rs2
 * SMDRS Rd, Rs1, Rs2
 * SMXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * perform a subtraction operation between the two 32-bit results.
 * * SMDS: top*top - bottom*bottom (per 32-bit element)
 * * SMDRS: bottom*bottom - top*top (per 32-bit element)
 * * SMXDS: top*bottom - bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
 * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
 * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
 * 32-bit elements of Rs2.
 * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
 * the 32-bit elements of Rs2.
 * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
 * content of the 32-bit elements of Rs2.
 * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
 * multiplication are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * * SMDS:
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * * SMDRS:
 * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
 * * SMXDS:
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMDRS(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smdrs %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.114.2. SMDRS ===== */

/* ===== Inline Function Start for 3.114.3. SMXDS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
 * \brief SMXDS (SIMD Signed Crossed Multiply Two Halfs and Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMDS Rd, Rs1, Rs2
 * SMDRS Rd, Rs1, Rs2
 * SMXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * perform a subtraction operation between the two 32-bit results.
 * * SMDS: top*top - bottom*bottom (per 32-bit element)
 * * SMDRS: bottom*bottom - top*top (per 32-bit element)
 * * SMXDS: top*bottom - bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
 * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
 * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
 * 32-bit elements of Rs2.
 * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
 * the 32-bit elements of Rs2.
 * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
 * content of the 32-bit elements of Rs2.
 * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
 * multiplication are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * * SMDS:
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * * SMDRS:
 * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
 * * SMXDS:
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMXDS(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smxds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.114.3. SMXDS ===== */

/* ===== Inline Function Start for 3.115. SMIN8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief SMIN8 (SIMD 8-bit Signed Minimum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMIN8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer elements finding minimum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
 * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SMIN8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("smin8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.115. SMIN8 ===== */

/* ===== Inline Function Start for 3.116. SMIN16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief SMIN16 (SIMD 16-bit Signed Minimum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMIN16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer elements finding minimum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
 * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SMIN16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("smin16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.116. SMIN16 ===== */

/* ===== Inline Function Start for 3.117.1. SMMUL ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief SMMUL (SIMD MSW Signed Multiply Word)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMMUL Rd, Rs1, Rs2
 * SMMUL.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit signed integer elements of two registers and write the most significant
 * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an
 * additional rounding up operation on the multiplication results before taking the most significant
 * 32-bit part of the results.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
 * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
 * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up
 * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
 * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][63:31] + 1;
 *   Rd.W[x] = Round[x][32:1];
 * } else {
 *   Rd.W[x] = Mres[x][63:32];
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMMUL(long a, long b)
{
    long result;
    __ASM volatile("smmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.117.1. SMMUL ===== */

/* ===== Inline Function Start for 3.117.2. SMMUL.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
 * \brief SMMUL.u (SIMD MSW Signed Multiply Word with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMMUL Rd, Rs1, Rs2
 * SMMUL.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit signed integer elements of two registers and write the most significant
 * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an
 * additional rounding up operation on the multiplication results before taking the most significant
 * 32-bit part of the results.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
 * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
 * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up
 * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
 * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][63:31] + 1;
 *   Rd.W[x] = Round[x][32:1];
 * } else {
 *   Rd.W[x] = Mres[x][63:32];
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMMUL_U(long a, long b)
{
    long result;
    __ASM volatile("smmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.117.2. SMMUL.u ===== */

/* ===== Inline Function Start for 3.118.1. SMMWB ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief SMMWB (SIMD MSW Signed Multiply Word and Bottom Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMMWB Rd, Rs1, Rs2
 * SMMWB.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
 * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
 * significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
 * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
 * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
 * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   Rd.W[x] = Round[x][32:1];
 * } else {
 *   Rd.W[x] = Mres[x][47:16];
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMMWB(long a, unsigned long b)
{
    long result;
    __ASM volatile("smmwb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.118.1. SMMWB ===== */

/* ===== Inline Function Start for 3.118.2. SMMWB.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief SMMWB.u (SIMD MSW Signed Multiply Word and Bottom Half with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMMWB Rd, Rs1, Rs2
 * SMMWB.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
 * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
 * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
 * significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
 * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
 * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
 * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   Rd.W[x] = Round[x][32:1];
 * } else {
 *   Rd.W[x] = Mres[x][47:16];
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMMWB_U(long a, unsigned long b)
{
    long result;
    __ASM volatile("smmwb.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.118.2. SMMWB.u ===== */

/* ===== Inline Function Start for 3.119.1. SMMWT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief SMMWT (SIMD MSW Signed Multiply Word and Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMMWT Rd, Rs1, Rs2
 * SMMWT.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
 * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
 * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
 * significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of
 * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
 * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
 * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   Rd.W[x] = Round[x][32:1];
 * } else {
 *   Rd.W[x] = Mres[x][47:16];
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMMWT(long a, unsigned long b)
{
    long result;
    __ASM volatile("smmwt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.119.1. SMMWT ===== */

/* ===== Inline Function Start for 3.119.2. SMMWT.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
 * \brief SMMWT.u (SIMD MSW Signed Multiply Word and Top Half with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMMWT Rd, Rs1, Rs2
 * SMMWT.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
 * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
 * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
 * significant discarded bit.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of
 * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
 * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
 * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
 * if (`.u` form) {
 *   Round[x][32:0] = Mres[x][47:15] + 1;
 *   Rd.W[x] = Round[x][32:1];
 * } else {
 *   Rd.W[x] = Mres[x][47:16];
 * }
 * for RV32: x=0
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMMWT_U(long a, unsigned long b)
{
    long result;
    __ASM volatile("smmwt.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.119.2. SMMWT.u ===== */

/* ===== Inline Function Start for 3.120.1. SMSLDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMSLDA (Signed Multiply Two Halfs & Add & Subtract 64-bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMSLDA Rd, Rs1, Rs2
 * SMSLXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a
 * register (RV64). The subtraction result is written back to the register-pair.
 * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements)
 * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
 * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
 * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers
 * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit
 * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of
 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction
 * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated
 * as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * // SMSLDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
 * // SMSLXDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]);
 * * RV64:
 * // SMSLDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * // SMSLXDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
 * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) -
 * SE64(Mres1[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMSLDA(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smslda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.120.1. SMSLDA ===== */

/* ===== Inline Function Start for 3.120.2. SMSLXDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
 * \brief SMSLXDA (Signed Crossed Multiply Two Halfs & Add & Subtract 64- bit)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMSLDA Rd, Rs1, Rs2
 * SMSLXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
 * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a
 * register (RV64). The subtraction result is written back to the register-pair.
 * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements)
 * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements)
 *
 * **RV32 Description**:\n
 * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
 * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
 * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers
 * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit
 * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers.
 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
 * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of
 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction
 * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated
 * as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * // SMSLDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
 * // SMSLXDA
 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]);
 * * RV64:
 * // SMSLDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * // SMSLXDA
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
 * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) -
 * SE64(Mres1[1][31:0]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMSLXDA(long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("smslxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.120.2. SMSLXDA ===== */

/* ===== Inline Function Start for 3.121. SMSR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief SMSR64 (Signed Multiply and Subtract from 64- Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SMSR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication
 * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
 * written back to the pair of registers (RV32) or a register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It
 * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers
 * specified by Rd(4,1). The subtraction result is written back to the even/odd pair of registers
 * specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
 * subtracts the 64-bit multiplication results from the 64-bit signed data of Rd. The subtraction result is
 * written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].R[t_L] = R[t_H].R[t_L] - (Rs1 * Rs2);
 * * RV64:
 * Rd = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]);
 * ~~~
 *
 * \param [in]  t    long long type of value stored in t
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_SMSR64(long long t, long a, long b)
{
    __ASM volatile("smsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.121. SMSR64 ===== */

/* ===== Inline Function Start for 3.122.1. SMUL8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
 * \brief SMUL8 (SIMD Signed 8-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMUL8 Rd, Rs1, Rs2
 * SMULX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do signed 8-bit multiplications and generate four 16-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
 * corresponding 8-bit data elements of Rs2.
 * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
 * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
 * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
 * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
 * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
 * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
 * part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
 * corresponding 8-bit data elements of Rs2.
 * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
 * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
 * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
 * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
 * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
 * the bottom part of Rs1.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `SMUL8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `SMULX8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
 * }
 * rest[x/2] = op1t[x/2] s* op2t[x/2];
 * resb[x/2] = op1b[x/2] s* op2b[x/2];
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
 * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
 * x = 0 and 2
 * * RV64:
 * if (is `SMUL8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `SMULX8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
 * }
 * rest[x/2] = op1t[x/2] s* op2t[x/2];
 * resb[x/2] = op1b[x/2] s* op2b[x/2];
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
 * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0];
 * x = 0 and 2
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_SMUL8(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("smul8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.122.1. SMUL8 ===== */

/* ===== Inline Function Start for 3.122.2. SMULX8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
 * \brief SMULX8 (SIMD Signed Crossed 8-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMUL8 Rd, Rs1, Rs2
 * SMULX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do signed 8-bit multiplications and generate four 16-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
 * corresponding 8-bit data elements of Rs2.
 * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
 * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
 * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
 * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
 * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
 * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
 * part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
 * corresponding 8-bit data elements of Rs2.
 * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
 * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
 * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
 * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
 * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
 * the bottom part of Rs1.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `SMUL8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `SMULX8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
 * }
 * rest[x/2] = op1t[x/2] s* op2t[x/2];
 * resb[x/2] = op1b[x/2] s* op2b[x/2];
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
 * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
 * x = 0 and 2
 * * RV64:
 * if (is `SMUL8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `SMULX8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
 * }
 * rest[x/2] = op1t[x/2] s* op2t[x/2];
 * resb[x/2] = op1b[x/2] s* op2b[x/2];
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
 * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0];
 * x = 0 and 2
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_SMULX8(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("smulx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.122.2. SMULX8 ===== */

/* ===== Inline Function Start for 3.123.1. SMUL16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
 * \brief SMUL16 (SIMD Signed 16-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMUL16 Rd, Rs1, Rs2
 * SMULX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do signed 16-bit multiplications and generate two 32-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with
 * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1
 * with the bottom 16-bit Q15 content of Rs2.
 * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit
 * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16-
 * bit Q15 content of Rs2.
 * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
 * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
 * register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
 * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower
 * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time,
 * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15
 * content of the lower 32-bit word in Rs2.
 * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1
 * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the
 * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the
 * lower 32-bit word in Rs2.
 * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the
 * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
 * the lower 32-bit word in Rs1 is written to Rd.W[0]
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `SMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `SMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop s* bop;
 * }
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H] = rest;
 * R[t_L] = resb;
 * * RV64:
 * if (is `SMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `SMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop s* bop;
 * }
 * Rd.W[1] = rest;
 * Rd.W[0] = resb;
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_SMUL16(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("smul16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.123.1. SMUL16 ===== */

/* ===== Inline Function Start for 3.123.2. SMULX16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
 * \brief SMULX16 (SIMD Signed Crossed 16-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SMUL16 Rd, Rs1, Rs2
 * SMULX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do signed 16-bit multiplications and generate two 32-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with
 * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1
 * with the bottom 16-bit Q15 content of Rs2.
 * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit
 * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16-
 * bit Q15 content of Rs2.
 * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
 * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
 * register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
 * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower
 * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time,
 * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15
 * content of the lower 32-bit word in Rs2.
 * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1
 * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the
 * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the
 * lower 32-bit word in Rs2.
 * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the
 * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
 * the lower 32-bit word in Rs1 is written to Rd.W[0]
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `SMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `SMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop s* bop;
 * }
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H] = rest;
 * R[t_L] = resb;
 * * RV64:
 * if (is `SMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `SMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop s* bop;
 * }
 * Rd.W[1] = rest;
 * Rd.W[0] = resb;
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_SMULX16(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("smulx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.123.2. SMULX16 ===== */

/* ===== Inline Function Start for 3.124. SRA.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief SRA.u (Rounding Shift Right Arithmetic)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SRA.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform an arithmetic right shift operation with rounding. The shift amount is a variable
 * from a GPR.
 *
 * **Description**:\n
 * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are
 * filled with the sign-bit and the shift amount is specified by the low-order 5-bits (RV32) or 6-bits
 * (RV64) of the Rs2 register. For the rounding operation, a value of 1 is added to the most significant
 * discarded bit of the data to calculate the final result. And the result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * sa = Rs2[4:0];
 * if (sa > 0) {
 *   res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
 *   Rd = res[31:0];
 * } else {
 *   Rd = Rs1;
 * }
 * * RV64:
 * sa = Rs2[5:0];
 * if (sa > 0) {
 *   res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1;
 *   Rd = res[63:0];
 * } else {
 *   Rd = Rs1;
 * }
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SRA_U(long a, unsigned int b)
{
    long result;
    __ASM volatile("sra.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.124. SRA.u ===== */

/* ===== Inline Function Start for 3.125. SRAI.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief SRAI.u (Rounding Shift Right Arithmetic Immediate)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SRAI.u Rd, Rs1, imm6u[4:0] (RV32)
 * SRAI.u Rd, Rs1, imm6u[5:0] (RV64)
 * ~~~
 *
 * **Purpose**:\n
 * Perform an arithmetic right shift operation with rounding. The shift amount is an
 * immediate value.
 *
 * **Description**:\n
 * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are
 * filled with the sign-bit and the shift amount is specified by the imm6u[4:0] (RV32) or imm6u[5:0]
 * (RV64) constant . For the rounding operation, a value of 1 is added to the most significant discarded
 * bit of the data to calculate the final result. And the result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * sa = imm6u[4:0];
 * if (sa > 0) {
 *   res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
 *   Rd = res[31:0];
 * } else {
 *   Rd = Rs1;
 * }
 * * RV64:
 * sa = imm6u[5:0];
 * if (sa > 0) {
 *   res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1;
 *   Rd = res[63:0];
 * } else {
 *   Rd = Rs1;
 * }
 * ~~~
 *
 * \param [in]  a    long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
#define __RV_SRAI_U(a, b)    \
    ({    \
        long result;    \
        long __a = (long)(a);    \
        __ASM volatile("srai.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.125. SRAI.u ===== */

/* ===== Inline Function Start for 3.126.1. SRA8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRA8 (SIMD 8-bit Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRA8 Rd, Rs1, Rs2
 * SRA8.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
 * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
 * added to the most significant discarded bit of each 8-bit data element to calculate the final results.
 * And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA8.u
 *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[7:0];
 *   } else { // SRA8
 *     Rd.B[x] = SE8(Rd.B[x][7:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRA8(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("sra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.126.1. SRA8 ===== */

/* ===== Inline Function Start for 3.126.2. SRA8.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRA8.u (SIMD 8-bit Rounding Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRA8 Rd, Rs1, Rs2
 * SRA8.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
 * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
 * added to the most significant discarded bit of each 8-bit data element to calculate the final results.
 * And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA8.u
 *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[7:0];
 *   } else { // SRA8
 *     Rd.B[x] = SE8(Rd.B[x][7:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRA8_U(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("sra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.126.2. SRA8.u ===== */

/* ===== Inline Function Start for 3.127.1. SRAI8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRAI8 (SIMD 8-bit Shift Right Arithmetic Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRAI8 Rd, Rs1, imm3u
 * SRAI8.u Rd, Rs1, imm3u
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u
 * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
 * discarded bit of each 8-bit data element to calculate the final results. And the results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm3u[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA8.u
 *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[7:0];
 *   } else { // SRA8
 *     Rd.B[x] = SE8(Rd.B[x][7:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRAI8(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srai8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.127.1. SRAI8 ===== */

/* ===== Inline Function Start for 3.127.2. SRAI8.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRAI8.u (SIMD 8-bit Rounding Shift Right Arithmetic Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRAI8 Rd, Rs1, imm3u
 * SRAI8.u Rd, Rs1, imm3u
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u
 * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
 * discarded bit of each 8-bit data element to calculate the final results. And the results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm3u[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA8.u
 *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[7:0];
 *   } else { // SRA8
 *     Rd.B[x] = SE8(Rd.B[x][7:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRAI8_U(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srai8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.127.2. SRAI8.u ===== */

/* ===== Inline Function Start for 3.128.1. SRA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRA16 (SIMD 16-bit Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRA16 Rd, Rs1, Rs2
 * SRA16.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
 * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
 * added to the most significant discarded bit of each 16-bit data element to calculate the final results.
 * And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[3:0];
 * if (sa != 0) {
 *   if (`.u` form) { // SRA16.u
 *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[15:0];
 *   } else { // SRA16
 *     Rd.H[x] = SE16(Rs1.H[x][15:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("sra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.128.1. SRA16 ===== */

/* ===== Inline Function Start for 3.128.2. SRA16.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRA16.u (SIMD 16-bit Rounding Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRA16 Rd, Rs1, Rs2
 * SRA16.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
 * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
 * added to the most significant discarded bit of each 16-bit data element to calculate the final results.
 * And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[3:0];
 * if (sa != 0) {
 *   if (`.u` form) { // SRA16.u
 *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[15:0];
 *   } else { // SRA16
 *     Rd.H[x] = SE16(Rs1.H[x][15:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRA16_U(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("sra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.128.2. SRA16.u ===== */

/* ===== Inline Function Start for 3.129.1. SRAI16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRAI16 (SIMD 16-bit Shift Right Arithmetic Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRAI16 Rd, Rs1, imm4u
 * SRAI16.u Rd, Rs1, imm4u
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is
 * an immediate value. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the
 * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
 * significant discarded bit of each 16-bit data to calculate the final results. And the results are written
 * to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm4u[3:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRAI16.u
 *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[15:0];
 *   } else { // SRAI16
 *     Rd.H[x] = SE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRAI16(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srai16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.129.1. SRAI16 ===== */

/* ===== Inline Function Start for 3.129.2. SRAI16.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRAI16.u (SIMD 16-bit Rounding Shift Right Arithmetic Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRAI16 Rd, Rs1, imm4u
 * SRAI16.u Rd, Rs1, imm4u
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is
 * an immediate value. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the
 * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
 * significant discarded bit of each 16-bit data to calculate the final results. And the results are written
 * to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm4u[3:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRAI16.u
 *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[15:0];
 *   } else { // SRAI16
 *     Rd.H[x] = SE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRAI16_U(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srai16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.129.2. SRAI16.u ===== */

/* ===== Inline Function Start for 3.130.1. SRL8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRL8 (SIMD 8-bit Shift Right Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRL8 Rt, Ra, Rb
 * SRL8.u Rt, Ra, Rb
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
 * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
 * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded
 * bit of each 8-bit data element to calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRL8.u
 *     res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[8:1];
 *   } else { // SRL8
 *     Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRL8(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("srl8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.130.1. SRL8 ===== */

/* ===== Inline Function Start for 3.130.2. SRL8.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRL8.u (SIMD 8-bit Rounding Shift Right Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRL8 Rt, Ra, Rb
 * SRL8.u Rt, Ra, Rb
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
 * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
 * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded
 * bit of each 8-bit data element to calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRL8.u
 *     res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[8:1];
 *   } else { // SRL8
 *     Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRL8_U(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("srl8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.130.2. SRL8.u ===== */

/* ===== Inline Function Start for 3.131.1. SRLI8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRLI8 (SIMD 8-bit Shift Right Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRLI8 Rt, Ra, imm3u
 * SRLI8.u Rt, Ra, imm3u
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
 * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of
 * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to
 * calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm3u[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRLI8.u
 *     res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[8:1];
 *   } else { // SRLI8
 *     Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRLI8(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.131.1. SRLI8 ===== */

/* ===== Inline Function Start for 3.131.2. SRLI8.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
 * \brief SRLI8.u (SIMD 8-bit Rounding Shift Right Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRLI8 Rt, Ra, imm3u
 * SRLI8.u Rt, Ra, imm3u
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
 * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of
 * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to
 * calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm3u[2:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRLI8.u
 *     res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
 *     Rd.B[x] = res[8:1];
 *   } else { // SRLI8
 *     Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRLI8_U(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srli8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.131.2. SRLI8.u ===== */

/* ===== Inline Function Start for 3.132.1. SRL16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRL16 (SIMD 16-bit Shift Right Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRL16 Rt, Ra, Rb
 *  SRL16.u Rt, Ra, Rb
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2
 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
 * discarded bit of each 16-bit data element to calculate the final results. And the results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[3:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRL16.u
 *     res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[16:1];
 *   } else { // SRL16
 *     Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRL16(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("srl16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.132.1. SRL16 ===== */

/* ===== Inline Function Start for 3.132.2. SRL16.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRL16.u (SIMD 16-bit Rounding Shift Right Logical)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRL16 Rt, Ra, Rb
 *  SRL16.u Rt, Ra, Rb
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2
 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
 * discarded bit of each 16-bit data element to calculate the final results. And the results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[3:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRL16.u
 *     res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[16:1];
 *   } else { // SRL16
 *     Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRL16_U(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("srl16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.132.2. SRL16.u ===== */

/* ===== Inline Function Start for 3.133.1. SRLI16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRLI16 (SIMD 16-bit Shift Right Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRLI16 Rt, Ra, imm4u
 * SRLI16.u Rt, Ra, imm4u
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding
 * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit
 * data element to calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm4u;
 * if (sa > 0) {
 *   if (`.u` form) { // SRLI16.u
 *     res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[16:1];
 *   } else { // SRLI16
 *     Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRLI16(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.133.1. SRLI16 ===== */

/* ===== Inline Function Start for 3.133.2. SRLI16.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
 * \brief SRLI16.u (SIMD 16-bit Rounding Shift Right Logical Immediate)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SRLI16 Rt, Ra, imm4u
 * SRLI16.u Rt, Ra, imm4u
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding
 * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit
 * data element to calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm4u;
 * if (sa > 0) {
 *   if (`.u` form) { // SRLI16.u
 *     res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
 *     Rd.H[x] = res[16:1];
 *   } else { // SRLI16
 *     Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRLI16_U(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srli16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.133.2. SRLI16.u ===== */

/* ===== Inline Function Start for 3.134. STAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief STAS16 (SIMD 16-bit Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * STAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit
 * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with
 * the 16-bit integer element in [31:16] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit
 * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [15:0] of 32-bit chunks in
 * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32-
 * bit chunks in Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][31:16];
 * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][15:0];
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_STAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("stas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.134. STAS16 ===== */

/* ===== Inline Function Start for 3.135. STSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief STSA16 (SIMD 16-bit Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * STSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit
 * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit integer element in [31:16] of 32-bit chunks in Rs2
 * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of
 * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [15:0] of 32-bit chunks in
 * Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to [15:0] of
 * 32-bit chunks in Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][31:16];
 * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][15:0];
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_STSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("stsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.135. STSA16 ===== */

/* ===== Inline Function Start for 3.136. SUB8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief SUB8 (SIMD 8-bit Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SUB8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit integer element subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 8-bit integer elements in Rs2 from the 8-bit integer
 * elements in Rs1, and then writes the result to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned subtraction.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = Rs1.B[x] - Rs2.B[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUB8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("sub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.136. SUB8 ===== */

/* ===== Inline Function Start for 3.137. SUB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief SUB16 (SIMD 16-bit Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * SUB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit integer elements in Rs2 from the 16-bit integer
 * elements in Rs1, and then writes the result to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned subtraction.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = Rs1.H[x] - Rs2.H[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("sub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.137. SUB16 ===== */

/* ===== Inline Function Start for 3.138. SUB64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief SUB64 (64-bit Subtraction)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * SUB64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform a 64-bit signed or unsigned integer subtraction.
 *
 * **RV32 Description**:\n
 * This instruction subtracts the 64-bit integer of an even/odd pair of registers
 * specified by Rs2(4,1) from the 64-bit integer of an even/odd pair of registers specified by Rs1(4,1),
 * and then writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * This instruction subtracts the 64-bit integer of Rs2 from the 64-bit integer of Rs1,
 * and then writes the 64-bit result to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned subtraction.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
 * R[t_H].R[t_L] = R[a_H].R[a_L] - R[b_H].R[b_L];
 * * RV64:
 * Rd = Rs1 - Rs2;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_SUB64(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("sub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.138. SUB64 ===== */

/* ===== Inline Function Start for 3.139.1. SUNPKD810 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief SUNPKD810 (Signed Unpacking Bytes 1 & 0)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
 * of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
 * // SUNPKD810, x=1,y=0
 * // SUNPKD820, x=2,y=0
 * // SUNPKD830, x=3,y=0
 * // SUNPKD831, x=3,y=1
 * // SUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUNPKD810(unsigned long a)
{
    unsigned long result;
    __ASM volatile("sunpkd810 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.139.1. SUNPKD810 ===== */

/* ===== Inline Function Start for 3.139.2. SUNPKD820 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief SUNPKD820 (Signed Unpacking Bytes 2 & 0)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
 * of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
 * // SUNPKD810, x=1,y=0
 * // SUNPKD820, x=2,y=0
 * // SUNPKD830, x=3,y=0
 * // SUNPKD831, x=3,y=1
 * // SUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUNPKD820(unsigned long a)
{
    unsigned long result;
    __ASM volatile("sunpkd820 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.139.2. SUNPKD820 ===== */

/* ===== Inline Function Start for 3.139.3. SUNPKD830 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief SUNPKD830 (Signed Unpacking Bytes 3 & 0)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
 * of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
 * // SUNPKD810, x=1,y=0
 * // SUNPKD820, x=2,y=0
 * // SUNPKD830, x=3,y=0
 * // SUNPKD831, x=3,y=1
 * // SUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUNPKD830(unsigned long a)
{
    unsigned long result;
    __ASM volatile("sunpkd830 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.139.3. SUNPKD830 ===== */

/* ===== Inline Function Start for 3.139.4. SUNPKD831 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief SUNPKD831 (Signed Unpacking Bytes 3 & 1)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
 * of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
 * // SUNPKD810, x=1,y=0
 * // SUNPKD820, x=2,y=0
 * // SUNPKD830, x=3,y=0
 * // SUNPKD831, x=3,y=1
 * // SUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUNPKD831(unsigned long a)
{
    unsigned long result;
    __ASM volatile("sunpkd831 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.139.4. SUNPKD831 ===== */

/* ===== Inline Function Start for 3.139.5. SUNPKD832 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief SUNPKD832 (Signed Unpacking Bytes 3 & 2)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
 * of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
 * // SUNPKD810, x=1,y=0
 * // SUNPKD820, x=2,y=0
 * // SUNPKD830, x=3,y=0
 * // SUNPKD831, x=3,y=1
 * // SUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUNPKD832(unsigned long a)
{
    unsigned long result;
    __ASM volatile("sunpkd832 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.139.5. SUNPKD832 ===== */

/* ===== Inline Function Start for 3.140. SWAP8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief SWAP8 (Swap Byte within Halfword)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SWAP8 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Swap the bytes within each halfword of a register.
 *
 * **Description**:\n
 * This instruction swaps the bytes within each halfword of Rs1 and writes the result to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = CONCAT(Rs1.H[x][7:0],Rs1.H[x][15:8]);
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SWAP8(unsigned long a)
{
    unsigned long result;
    __ASM volatile("swap8 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.140. SWAP8 ===== */

/* ===== Inline Function Start for 3.141. SWAP16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief SWAP16 (Swap Halfword within Word)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * SWAP16 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Swap the 16-bit halfwords within each word of a register.
 *
 * **Description**:\n
 * This instruction swaps the 16-bit halfwords within each word of Rs1 and writes the
 * result to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = CONCAT(Rs1.W[x][15:0],Rs1.H[x][31:16]);
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SWAP16(unsigned long a)
{
    unsigned long result;
    __ASM volatile("swap16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.141. SWAP16 ===== */

/* ===== Inline Function Start for 3.142. UCLIP8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief UCLIP8 (SIMD 8-bit Unsigned Clip Value)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UCLIP8 Rt, Ra, imm3u
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 8-bit signed elements of a register into an unsigned range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 8-bit signed elements stored in Rs1 into an unsigned integer
 * range between 2^imm3u-1 and 0, and writes the limited results to Rd. For example, if imm3u is 3, the 8-
 * bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.H[x];
 * if (src > (2^imm3u)-1) {
 *   src = (2^imm3u)-1;
 *   OV = 1;
 * } else if (src < 0) {
 *   src = 0;
 *   OV = 1;
 * }
 * Rd.H[x] = src;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_UCLIP8(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("uclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.142. UCLIP8 ===== */

/* ===== Inline Function Start for 3.143. UCLIP16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief UCLIP16 (SIMD 16-bit Unsigned Clip Value)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UCLIP16 Rt, Ra, imm4u
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 16-bit signed elements of a register into an unsigned range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 16-bit signed elements stored in Rs1 into an unsigned
 * integer range between 2imm4u-1 and 0, and writes the limited results to Rd. For example, if imm4u is
 * 3, the 16-bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit
 * to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.H[x];
 * if (src > (2^imm4u)-1) {
 *   src = (2^imm4u)-1;
 *   OV = 1;
 * } else if (src < 0) {
 *   src = 0;
 *   OV = 1;
 * }
 * Rd.H[x] = src;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_UCLIP16(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("uclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.143. UCLIP16 ===== */

/* ===== Inline Function Start for 3.144. UCLIP32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
 * \brief UCLIP32 (SIMD 32-bit Unsigned Clip Value)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UCLIP32 Rd, Rs1, imm5u[4:0]
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 32-bit signed integer elements of a register into an unsigned range
 * simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 32-bit signed integer elements stored in Rs1 into an
 * unsigned integer range between 2imm5u-1 and 0, and writes the limited results to Rd. For example, if
 * imm5u is 3, the 32-bit input values should be saturated between 7 and 0. If saturation is performed,
 * set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.W[x];
 * if (src > (2^imm5u)-1) {
 *   src = (2^imm5u)-1;
 *   OV = 1;
 * } else if (src < 0) {
 *   src = 0;
 *   OV = 1;
 * }
 * Rd.W[x] = src
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_UCLIP32(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("uclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.144. UCLIP32 ===== */

/* ===== Inline Function Start for 3.145. UCMPLE8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
 * \brief UCMPLE8 (SIMD 8-bit Unsigned Compare Less Than & Equal)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UCMPLE8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer elements less than & equal comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
 * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it
 * is true, the result is 0xFF; otherwise, the result is 0x0. The four comparison results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] <=u Rs2.B[x])? 0xff : 0x0;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UCMPLE8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ucmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.145. UCMPLE8 ===== */

/* ===== Inline Function Start for 3.146. UCMPLE16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
 * \brief UCMPLE16 (SIMD 16-bit Unsigned Compare Less Than & Equal)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UCMPLE16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer elements less than & equal comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
 * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it
 * is true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are
 * written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] <=u Rs2.H[x])? 0xffff : 0x0;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UCMPLE16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ucmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.146. UCMPLE16 ===== */

/* ===== Inline Function Start for 3.147. UCMPLT8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
 * \brief UCMPLT8 (SIMD 8-bit Unsigned Compare Less Than)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UCMPLT8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer elements less than comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
 * unsigned integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
 * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] <u Rs2.B[x])? 0xff : 0x0;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UCMPLT8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ucmplt8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.147. UCMPLT8 ===== */

/* ===== Inline Function Start for 3.148. UCMPLT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
 * \brief UCMPLT16 (SIMD 16-bit Unsigned Compare Less Than)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UCMPLT16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer elements less than comparisons simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
 * unsigned integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
 * result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] <u Rs2.H[x])? 0xffff : 0x0;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UCMPLT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ucmplt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.148. UCMPLT16 ===== */

/* ===== Inline Function Start for 3.149. UKADD8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief UKADD8 (SIMD 8-bit Unsigned Saturating Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKADD8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 8-bit unsigned integer elements in Rs1 with the 8-bit
 * unsigned integer elements in Rs2. If any of the results are beyond the 8-bit unsigned number range
 * (0 <= RES <= 28-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
 * written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.B[x] + Rs2.B[x];
 * if (res[x] > (2^8)-1) {
 *   res[x] = (2^8)-1;
 *   OV = 1;
 * }
 * Rd.B[x] = res[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKADD8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.149. UKADD8 ===== */

/* ===== Inline Function Start for 3.150. UKADD16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief UKADD16 (SIMD 16-bit Unsigned Saturating Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKADD16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit
 * unsigned integer elements in Rs2. If any of the results are beyond the 16-bit unsigned number
 * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.H[x] + Rs2.H[x];
 * if (res[x] > (2^16)-1) {
 *   res[x] = (2^16)-1;
 *   OV = 1;
 * }
 * Rd.H[x] = res[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKADD16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.150. UKADD16 ===== */

/* ===== Inline Function Start for 3.151. UKADD64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief UKADD64 (64-bit Unsigned Saturating Addition)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * UKADD64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add two 64-bit unsigned integers. The result is saturated to the U64 range.
 *
 * **RV32 Description**:\n
 * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers
 * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by
 * Rs2(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the
 * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers
 * specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned
 * integer in Rs2. If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to
 * the range and the OV bit is set to 1. The saturated result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
 * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
 * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
 * result = R[a_H].R[a_L] + R[b_H].R[b_L];
 * if (result > (2^64)-1) {
 *   result = (2^64)-1; OV = 1;
 * }
 * R[t_H].R[t_L] = result;
 * * RV64:
 * result = Rs1 + Rs2;
 * if (result > (2^64)-1) {
 *   result = (2^64)-1; OV = 1;
 * }
 * Rd = result;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UKADD64(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("ukadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.151. UKADD64 ===== */

/* ===== Inline Function Start for 3.152. UKADDH ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
 * \brief UKADDH (Unsigned Addition with U16 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * UKADDH Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add the unsigned lower 32-bit content of two registers with U16 saturation.
 *
 * **Description**:\n
 * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit
 * content of Rs2. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then
 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] + Rs2.W[0];
 * if (tmp > (2^16)-1) {
 *   tmp = (2^16)-1;
 *   OV = 1;
 * }
 * Rd = SE(tmp[15:0]);
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKADDH(unsigned int a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("ukaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.152. UKADDH ===== */

/* ===== Inline Function Start for 3.153. UKADDW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief UKADDW (Unsigned Addition with U32 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * UKADDW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add the unsigned lower 32-bit content of two registers with U32 saturation.
 *
 * **Description**:\n
 * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit
 * content of Rs2. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then
 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] + Rs2.W[0];
 * if (tmp > (2^32)-1) {
 *   tmp[31:0] = (2^32)-1;
 *   OV = 1;
 * }
 * Rd = tmp[31:0]; // RV32
 * Rd = SE(tmp[31:0]); // RV64
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKADDW(unsigned int a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("ukaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.153. UKADDW ===== */

/* ===== Inline Function Start for 3.154. UKCRAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief UKCRAS16 (SIMD 16-bit Unsigned Saturating Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKCRAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned
 * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed
 * positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in
 * Rs1 with the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it
 * subtracts the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit
 * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit
 * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1.
 * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit
 * chunks in Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
 * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
 * if (res1 > (2^16)-1) {
 *   res1 = (2^16)-1;
 *   OV = 1;
 * }
 * if (res2 < 0) {
 *   res2 = 0;
 *   OV = 1;
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKCRAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.154. UKCRAS16 ===== */

/* ===== Inline Function Start for 3.155. UKCRSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief UKCRSA16 (SIMD 16-bit Unsigned Saturating Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKCRSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned
 * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from crossed
 * positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit unsigned integer element in [15:0] of 32-bit
 * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the
 * same time, it adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 with the 16-
 * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the
 * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set
 * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of
 * 32-bit chunks in Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
 * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
 * if (res1 < 0) {
 *   res1 = 0;
 *   OV = 1;
 * } else if (res2 > (2^16)-1) {
 *   res2 = (2^16)-1;
 *   OV = 1;
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKCRSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.155. UKCRSA16 ===== */

/* ===== Inline Function Start for 3.156. UKMAR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief UKMAR64 (Unsigned Multiply and Saturating Add to 64-Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * UKMAR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication
 * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is
 * saturated to the U64 range and written back to the pair of registers (RV32) or the register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
 * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers
 * specified by Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the U64 number
 * range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is
 * written back to the even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
 * It adds the 64-bit multiplication results to the 64-bit unsigned data in Rd with unlimited precision. If
 * the 64-bit addition result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the
 * range and the OV bit is set to 1. The saturated result is written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * result = R[t_H].R[t_L] + (Rs1 * Rs2);
 * if (result > (2^64)-1) {
 *   result = (2^64)-1; OV = 1;
 * }
 * R[t_H].R[t_L] = result;
 * * RV64:
 * // `result` has unlimited precision
 * result = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]);
 * if (result > (2^64)-1) {
 *   result = (2^64)-1; OV = 1;
 * }
 * Rd = result;
 * ~~~
 *
 * \param [in]  t    unsigned long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UKMAR64(unsigned long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("ukmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.156. UKMAR64 ===== */

/* ===== Inline Function Start for 3.157. UKMSR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief UKMSR64 (Unsigned Multiply and Saturating Subtract from 64-Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * UKMSR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit unsigned elements in two registers and subtract the 64-bit
 * multiplication results from the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64).
 * The result is saturated to the U64 range and written back to the pair of registers (RV32) or a register
 * (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
 * subtracts the 64-bit multiplication result from the 64-bit unsigned data of an even/odd pair of
 * registers specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the
 * U64 number range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The
 * saturated result is written back to the even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
 * It subtracts the 64-bit multiplication results from the 64-bit unsigned data of Rd with unlimited
 * precision. If the 64-bit subtraction result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is
 * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * result = R[t_H].R[t_L] - (Rs1 u* Rs2);
 * if (result < 0) {
 *   result = 0; OV = 1;
 * }
 * R[t_H].R[t_L] = result;
 * * RV64:
 * // `result` has unlimited precision
 * result = Rd - (Rs1.W[0] u* Rs2.W[0]) - (Rs1.W[1] u* Rs2.W[1]);
 * if (result < 0) {
 *   result = 0; OV = 1;
 * }
 * Rd = result;
 * ~~~
 *
 * \param [in]  t    unsigned long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UKMSR64(unsigned long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("ukmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.157. UKMSR64 ===== */

/* ===== Inline Function Start for 3.158. UKSTAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief UKSTAS16 (SIMD 16-bit Unsigned Saturating Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKSTAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned
 * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from
 * corresponding positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in
 * Rs1 with the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it
 * subtracts the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit
 * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit
 * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1.
 * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit
 * chunks in Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
 * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
 * if (res1 > (2^16)-1) {
 *   res1 = (2^16)-1;
 *   OV = 1;
 * }
 * if (res2 < 0) {
 *   res2 = 0;
 *   OV = 1;
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSTAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.158. UKSTAS16 ===== */

/* ===== Inline Function Start for 3.159. UKSTSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief UKSTSA16 (SIMD 16-bit Unsigned Saturating Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKSTSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned
 * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from
 * corresponding positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit unsigned integer element in [31:16] of 32-bit
 * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the
 * same time, it adds the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 with the 16-
 * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the
 * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set
 * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of
 * 32-bit chunks in Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
 * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
 * if (res1 < 0) {
 *   res1 = 0;
 *   OV = 1;
 * } else if (res2 > (2^16)-1) {
 *   res2 = (2^16)-1;
 *   OV = 1;
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSTSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.159. UKSTSA16 ===== */

/* ===== Inline Function Start for 3.160. UKSUB8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief UKSUB8 (SIMD 8-bit Unsigned Saturating Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKSUB8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit
 * unsigned integer elements in Rs1. If any of the results are beyond the 8-bit unsigned number range
 * (0 <= RES <= 28-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
 * written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.B[x] - Rs2.B[x];
 * if (res[x] < 0) {
 *   res[x] = 0;
 *   OV = 1;
 * }
 * Rd.B[x] = res[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSUB8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("uksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.160. UKSUB8 ===== */

/* ===== Inline Function Start for 3.161. UKSUB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief UKSUB16 (SIMD 16-bit Unsigned Saturating Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UKSUB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit
 * unsigned integer elements in Rs1. If any of the results are beyond the 16-bit unsigned number
 * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.H[x] - Rs2.H[x];
 * if (res[x] < 0) {
 *   res[x] = 0;
 *   OV = 1;
 * }
 * Rd.H[x] = res[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSUB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("uksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.161. UKSUB16 ===== */

/* ===== Inline Function Start for 3.162. UKSUB64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief UKSUB64 (64-bit Unsigned Saturating Subtraction)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * UKSUB64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform a 64-bit signed integer subtraction. The result is saturated to the U64 range.
 *
 * **RV32 Description**:\n
 * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of
 * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers
 * specified by Rs1(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is
 * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd
 * pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
 * register of the pair contains the low 32-bit of the operand.
 *
 * **RV64 Description**:\n
 * This instruction subtracts the 64-bit unsigned integer of Rs2 from the 64-bit
 * unsigned integer of an even/odd pair of Rs1. If the 64-bit result is beyond the U64 number range (0 <=
 * U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is then written
 * to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
 * result = R[a_H].R[a_L] - R[b_H].R[b_L];
 * if (result < 0) {
 *   result = 0; OV = 1;
 * }
 * R[t_H].R[t_L] = result;
 * * RV64
 * result = Rs1 - Rs2;
 * if (result < 0) {
 *   result = 0; OV = 1;
 * }
 * Rd = result;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UKSUB64(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("uksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.162. UKSUB64 ===== */

/* ===== Inline Function Start for 3.163. UKSUBH ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
 * \brief UKSUBH (Unsigned Subtraction with U16 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * UKSUBH Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Subtract the unsigned lower 32-bit content of two registers with U16 saturation.
 *
 * **Description**:\n
 * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit
 * content of Rs1. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then
 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] - Rs2.W[0];
 * if (tmp > (2^16)-1) {
 *   tmp = (2^16)-1;
 *   OV = 1;
 * }
 * else if (tmp < 0) {
 *   tmp = 0;
 *   OV = 1;
 * }
 * Rd = SE(tmp[15:0]);
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSUBH(unsigned int a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("uksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.163. UKSUBH ===== */

/* ===== Inline Function Start for 3.164. UKSUBW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
 * \brief UKSUBW (Unsigned Subtraction with U32 Saturation)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * UKSUBW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Subtract the unsigned lower 32-bit content of two registers with unsigned 32-bit
 * saturation.
 *
 * **Description**:\n
 * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit
 * content of Rs1. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then
 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
 *
 * **Operations**:\n
 * ~~~
 * tmp = Rs1.W[0] - Rs2.W[0];
 * if (tmp < 0) {
 *   tmp[31:0] = 0;
 *   OV = 1;
 * }
 * Rd = tmp[31:0]; // RV32
 * Rd = SE(tmp[31:0]); // RV64
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSUBW(unsigned int a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("uksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.164. UKSUBW ===== */

/* ===== Inline Function Start for 3.165. UMAR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief UMAR64 (Unsigned Multiply and Add to 64-Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * UMAR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication
 * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is
 * written back to the pair of registers (RV32) or a register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
 * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers
 * specified by Rd(4,1). The addition result is written back to the even/odd pair of registers specified by
 * Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
 * It adds the 64-bit multiplication results to the 64-bit unsigned data of Rd. The addition result is
 * written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2);
 * * RV64:
 * Rd = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]);
 * ~~~
 *
 * \param [in]  t    unsigned long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UMAR64(unsigned long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("umar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.165. UMAR64 ===== */

/* ===== Inline Function Start for 3.166. UMAQA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
 * \brief UMAQA (Unsigned Multiply Four Bytes with 32- bit Adds)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * UMAQA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four unsigned 8-bit multiplications from 32-bit chunks of two registers; and then adds
 * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together.
 *
 * **Description**:\n
 * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four
 * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
 * unsigned content of the corresponding 32-bit chunks of Rd. The final results are written back to the
 * corresponding 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rd.W[x] + (Rs1.W[x].B[3] u* Rs2.W[x].B[3]) +
 *          (Rs1.W[x].B[2] u* Rs2.W[x].B[2]) + (Rs1.W[x].B[1] u* Rs2.W[x].B[1]) +
 *          (Rs1.W[x].B[0] u* Rs2.W[x].B[0]);
 * Rd.W[x] = res[x];
 * for RV32: x=0,
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UMAQA(unsigned long t, unsigned long a, unsigned long b)
{
    __ASM volatile("umaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.166. UMAQA ===== */

/* ===== Inline Function Start for 3.167. UMAX8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief UMAX8 (SIMD 8-bit Unsigned Maximum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMAX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer elements finding maximum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit unsigned integer elements in Rs1 with the four 8-
 * bit unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
 * two selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] >u Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UMAX8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("umax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.167. UMAX8 ===== */

/* ===== Inline Function Start for 3.168. UMAX16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief UMAX16 (SIMD 16-bit Unsigned Maximum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMAX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer elements finding maximum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
 * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] >u Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UMAX16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("umax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.168. UMAX16 ===== */

/* ===== Inline Function Start for 3.169. UMIN8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
 * \brief UMIN8 (SIMD 8-bit Unsigned Minimum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMIN8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer elements finding minimum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
 * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] <u Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UMIN8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("umin8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.169. UMIN8 ===== */

/* ===== Inline Function Start for 3.170. UMIN16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
 * \brief UMIN16 (SIMD 16-bit Unsigned Minimum)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMIN16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer elements finding minimum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
 * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] <u Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UMIN16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("umin16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.170. UMIN16 ===== */

/* ===== Inline Function Start for 3.171. UMSR64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
 * \brief UMSR64 (Unsigned Multiply and Subtract from 64-Bit Data)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * UMSR64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the 32-bit unsigned elements in two registers and subtract the 64-bit
 * multiplication results from the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64).
 * The result is written back to the pair of registers (RV32) or a register (RV64).
 *
 * **RV32 Description**:\n
 * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
 * subtracts the 64-bit multiplication result from the 64-bit unsigned data of an even/odd pair of
 * registers specified by Rd(4,1). The subtraction result is written back to the even/odd pair of registers
 * specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
 * It subtracts the 64-bit multiplication results from the 64-bit unsigned data of Rd. The subtraction
 * result is written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].R[t_L] = R[t_H].R[t_L] - (Rs1 * Rs2);
 * * RV64:
 * Rd = Rd - (Rs1.W[0] u* Rs2.W[0]) - (Rs1.W[1] u* Rs2.W[1]);
 * ~~~
 *
 * \param [in]  t    unsigned long long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UMSR64(unsigned long long t, unsigned long a, unsigned long b)
{
    __ASM volatile("umsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 3.171. UMSR64 ===== */

/* ===== Inline Function Start for 3.172.1. UMUL8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
 * \brief UMUL8 (SIMD Unsigned 8-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMUL8 Rd, Rs1, Rs2
 * UMULX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do unsigned 8-bit multiplications and generate four 16-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
 * with the corresponding unsigned 8-bit data elements of Rs2.
 * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
 * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
 * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
 * elements of Rs2.
 * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
 * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
 * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
 * part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
 * with the corresponding unsigned 8-bit data elements of Rs2.
 * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
 * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
 * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
 * elements of Rs2.
 * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
 * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
 * the bottom part of Rs1.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `UMUL8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `UMULX8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
 * }
 * rest[x/2] = op1t[x/2] u* op2t[x/2];
 * resb[x/2] = op1b[x/2] u* op2b[x/2];
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
 * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
 * x = 0 and 2
 * * RV64:
 * if (is `UMUL8`) {
 *     op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *     op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `UMULX8`) {
 *     op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *     op1b[x/2]  =  Rs1.B[x]; op2b[x/2]  =  Rs2.B[x+1];  //  Rs1  bottom
 * }
 * rest[x/2]  =  op1t[x/2]  u*  op2t[x/2];
 * resb[x/2]  =  op1b[x/2]  u*  op2b[x/2];
 * t_L  =  CONCAT(Rd(4,1),1'b0); t_H  =  CONCAT(Rd(4,1),1'b1);
 * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
 * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; x = 0 and 2
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UMUL8(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("umul8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.172.1. UMUL8 ===== */

/* ===== Inline Function Start for 3.172.2. UMULX8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
 * \brief UMULX8 (SIMD Unsigned Crossed 8-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMUL8 Rd, Rs1, Rs2
 * UMULX8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do unsigned 8-bit multiplications and generate four 16-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
 * with the corresponding unsigned 8-bit data elements of Rs2.
 * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
 * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
 * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
 * elements of Rs2.
 * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
 * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
 * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
 * part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
 * with the corresponding unsigned 8-bit data elements of Rs2.
 * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
 * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
 * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
 * elements of Rs2.
 * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
 * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
 * the bottom part of Rs1.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `UMUL8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `UMULX8`) {
 *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
 * }
 * rest[x/2] = op1t[x/2] u* op2t[x/2];
 * resb[x/2] = op1b[x/2] u* op2b[x/2];
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
 * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
 * x = 0 and 2
 * * RV64:
 * if (is `UMUL8`) {
 *     op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
 *     op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
 * } else if (is `UMULX8`) {
 *     op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
 *     op1b[x/2]  =  Rs1.B[x]; op2b[x/2]  =  Rs2.B[x+1];  //  Rs1  bottom
 * }
 * rest[x/2]  =  op1t[x/2]  u*  op2t[x/2];
 * resb[x/2]  =  op1b[x/2]  u*  op2b[x/2];
 * t_L  =  CONCAT(Rd(4,1),1'b0); t_H  =  CONCAT(Rd(4,1),1'b1);
 * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
 * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; x = 0 and 2
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UMULX8(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("umulx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.172.2. UMULX8 ===== */

/* ===== Inline Function Start for 3.173.1. UMUL16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
 * \brief UMUL16 (SIMD Unsigned 16-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMUL16 Rd, Rs1, Rs2
 * UMULX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do unsigned 16-bit multiplications and generate two 32-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `UMUL16` instruction, multiply the top 16-bit U16 content of Rs1 with
 * the top 16-bit U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1
 * with the bottom 16-bit U16 content of Rs2.
 * For the `UMULX16` instruction, multiply the top 16-bit U16 content of Rs1 with the bottom 16-bit
 * U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1 with the top 16-
 * bit U16 content of Rs2.
 * The two U32 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
 * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
 * register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
 * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `UMUL16` instruction, multiply the top 16-bit U16 content of the lower
 * 32-bit word in Rs1 with the top 16-bit U16 content of the lower 32-bit word in Rs2. At the same time,
 * multiply the bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the bottom 16-bit U16
 * content of the lower 32-bit word in Rs2.
 * For the `UMULX16` instruction, multiply the top 16-bit U16 content of the lower 32-bit word in Rs1
 * with the bottom 16-bit U16 content of the lower 32-bit word in Rs2. At the same time, multiply the
 * bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the top 16-bit U16 content of the
 * lower 32-bit word in Rs2.
 * The two 32-bit U32 results are then written into Rd. The result calculated from the top 16-bit of the
 * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
 * the lower 32-bit word in Rs1 is written to Rd.W[0]
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `UMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `UMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop u* bop;
 * }
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H] = rest;
 * R[t_L] = resb;
 * * RV64:
 * if (is `UMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `UMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop u* bop;
 * }
 * Rd.W[1] = rest;
 * Rd.W[0] = resb;
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UMUL16(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("umul16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.173.1. UMUL16 ===== */

/* ===== Inline Function Start for 3.173.2. UMULX16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
 * \brief UMULX16 (SIMD Unsigned Crossed 16-bit Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * UMUL16 Rd, Rs1, Rs2
 * UMULX16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do unsigned 16-bit multiplications and generate two 32-bit results simultaneously.
 *
 * **RV32 Description**:\n
 * For the `UMUL16` instruction, multiply the top 16-bit U16 content of Rs1 with
 * the top 16-bit U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1
 * with the bottom 16-bit U16 content of Rs2.
 * For the `UMULX16` instruction, multiply the top 16-bit U16 content of Rs1 with the bottom 16-bit
 * U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1 with the top 16-
 * bit U16 content of Rs2.
 * The two U32 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
 * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
 * register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
 * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
 *
 * **RV64 Description**:\n
 * For the `UMUL16` instruction, multiply the top 16-bit U16 content of the lower
 * 32-bit word in Rs1 with the top 16-bit U16 content of the lower 32-bit word in Rs2. At the same time,
 * multiply the bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the bottom 16-bit U16
 * content of the lower 32-bit word in Rs2.
 * For the `UMULX16` instruction, multiply the top 16-bit U16 content of the lower 32-bit word in Rs1
 * with the bottom 16-bit U16 content of the lower 32-bit word in Rs2. At the same time, multiply the
 * bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the top 16-bit U16 content of the
 * lower 32-bit word in Rs2.
 * The two 32-bit U32 results are then written into Rd. The result calculated from the top 16-bit of the
 * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
 * the lower 32-bit word in Rs1 is written to Rd.W[0]
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * if (is `UMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `UMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop u* bop;
 * }
 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
 * R[t_H] = rest;
 * R[t_L] = resb;
 * * RV64:
 * if (is `UMUL16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
 * } else if (is `UMULX16`) {
 *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
 *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
 * }
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = aop u* bop;
 * }
 * Rd.W[1] = rest;
 * Rd.W[0] = resb;
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_UMULX16(unsigned int a, unsigned int b)
{
    unsigned long long result;
    __ASM volatile("umulx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.173.2. UMULX16 ===== */

/* ===== Inline Function Start for 3.174. URADD8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief URADD8 (SIMD 8-bit Unsigned Halving Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URADD8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer element additions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 8-bit unsigned integer elements in Rs1 with the 8-bit
 * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7F, Rb = 0x7F, Rt = 0x7F
 * * Ra = 0x80, Rb = 0x80, Rt = 0x80
 * * Ra = 0x40, Rb = 0x80, Rt = 0x60
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] + Rs2.B[x]) u>> 1;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URADD8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("uradd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.174. URADD8 ===== */

/* ===== Inline Function Start for 3.175. URADD16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief URADD16 (SIMD 16-bit Unsigned Halving Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URADD16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer element additions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit
 * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFF, Rb = 0x7FFF Rt = 0x7FFF
 * * Ra = 0x8000, Rb = 0x8000 Rt = 0x8000
 * * Ra = 0x4000, Rb = 0x8000 Rt = 0x6000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) u>> 1;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URADD16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("uradd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.175. URADD16 ===== */

/* ===== Inline Function Start for 3.176. URADD64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief URADD64 (64-bit Unsigned Halving Addition)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * URADD64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add two 64-bit unsigned integers. The result is halved to avoid overflow or saturation.
 *
 * **RV32 Description**:\n
 * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers
 * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by
 * Rs2(4,1). The 64-bit addition result is first logically right-shifted by 1 bit and then written to an
 * even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned
 * integer Rs2. The 64-bit addition result is first logically right-shifted by 1 bit and then written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
 * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
 * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
 * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) u>> 1;
 * * RV64:
 * Rd = (Rs1 + Rs2) u>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_URADD64(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("uradd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.176. URADD64 ===== */

/* ===== Inline Function Start for 3.177. URADDW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief URADDW (32-bit Unsigned Halving Addition)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * URADDW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Add 32-bit unsigned integers and the results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the first 32-bit unsigned integer in Rs1 with the first 32-bit
 * unsigned integer in Rs2. The result is first logically right-shifted by 1 bit and then sign-extended and
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFFFFFF, Rb = 0x7FFFFFFF Rt = 0x7FFFFFFF
 * * Ra = 0x80000000, Rb = 0x80000000 Rt = 0x80000000
 * * Ra = 0x40000000, Rb = 0x80000000 Rt = 0x60000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1;
 * * RV64:
 * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1;
 * Rd[63:0] = SE(resw[31:0]);
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URADDW(unsigned int a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("uraddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.177. URADDW ===== */

/* ===== Inline Function Start for 3.178. URCRAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief URCRAS16 (SIMD 16-bit Unsigned Halving Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URCRAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element
 * subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
 * The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1
 * with the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned
 * integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks
 * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32-
 * bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD16` and `URSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) u>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) u>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URCRAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.178. URCRAS16 ===== */

/* ===== Inline Function Start for 3.179. URCRSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief URCRSA16 (SIMD 16-bit Unsigned Halving Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URCRSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element
 * addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
 * The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2
 * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned
 * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [31:16] of 32-bit chunks
 * in Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit
 * chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD16` and `URSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) u>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) u>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URCRSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.179. URCRSA16 ===== */

/* ===== Inline Function Start for 3.180. URSTAS16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief URSTAS16 (SIMD 16-bit Unsigned Halving Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URSTAS16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element
 * subtraction in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit
 * chunks. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1
 * with the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned
 * integer in [15:0] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks
 * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32-
 * bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD16` and `URSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) u>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) u>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSTAS16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.180. URSTAS16 ===== */

/* ===== Inline Function Start for 3.181. URSTSA16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief URSTSA16 (SIMD 16-bit Unsigned Halving Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URCRSA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element
 * addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit
 * chunks. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2
 * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned
 * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [15:0] of 32-bit chunks in
 * Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit
 * chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD16` and `URSUB16` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) u>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) u>> 1;
 * for RV32, x=0
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSTSA16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.181. URSTSA16 ===== */

/* ===== Inline Function Start for 3.182. URSUB8 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
 * \brief URSUB8 (SIMD 8-bit Unsigned Halving Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URSUB8 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit unsigned integer element subtractions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit
 * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7F, Rb = 0x80 Rt = 0xFF
 * * Ra = 0x80, Rb = 0x7F Rt = 0x00
 * * Ra = 0x80, Rb = 0x40 Rt = 0x20
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) u>> 1;
 * for RV32: x=3...0,
 * for RV64: x=7...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSUB8(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ursub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.182. URSUB8 ===== */

/* ===== Inline Function Start for 3.183. URSUB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
 * \brief URSUB16 (SIMD 16-bit Unsigned Halving Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * URSUB16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit unsigned integer element subtractions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit
 * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFF, Rb = 0x8000 Rt = 0xFFFF
 * * Ra = 0x8000, Rb = 0x7FFF Rt = 0x0000
 * * Ra = 0x8000, Rb = 0x4000 Rt = 0x2000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) u>> 1;
 * for RV32: x=1...0,
 * for RV64: x=3...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSUB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ursub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.183. URSUB16 ===== */

/* ===== Inline Function Start for 3.184. URSUB64 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
 * \brief URSUB64 (64-bit Unsigned Halving Subtraction)
 * \details
 * **Type**: DSP (64-bit Profile)
 *
 * **Syntax**:\n
 * ~~~
 * URSUB64 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Perform a 64-bit unsigned integer subtraction. The result is halved to avoid overflow or
 * saturation.
 *
 * **RV32 Description**:\n
 * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of
 * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers
 * specified by Rs1(4,1). The subtraction result is first logically right-shifted by 1 bit and then written
 * to an even/odd pair of registers specified by Rd(4,1).
 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
 * includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
 * of the pair contains the low 32-bit of the result.
 *
 * **RV64 Description**:\n
 * This instruction subtracts the 64-bit unsigned integer in Rs2 from the 64-bit
 * unsigned integer in Rs1. The subtraction result is first logically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
 * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
 * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
 * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) u>> 1;
 * * RV64:
 * Rd = (Rs1 - Rs2) u>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_URSUB64(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("ursub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.184. URSUB64 ===== */

/* ===== Inline Function Start for 3.185. URSUBW ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
 * \brief URSUBW (32-bit Unsigned Halving Subtraction)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * URSUBW Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Subtract 32-bit unsigned integers and the result is halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit
 * signed integer in Rs1. The result is first logically right-shifted by 1 bit and then sign-extended and
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0xFFFFFFFF
 * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x00000000
 * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0x20000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1;
 * * RV64:
 * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1;
 * Rd[63:0] = SE(resw[31:0]);
 * ~~~
 *
 * \param [in]  a    unsigned int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSUBW(unsigned int a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("ursubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.185. URSUBW ===== */

/* ===== Inline Function Start for 3.186. WEXTI ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief WEXTI (Extract Word from 64-bit Immediate)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * WEXTI Rd, Rs1, #LSBloc
 * ~~~
 *
 * **Purpose**:\n
 * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or
 * a register (RV64) starting from a specified immediate LSB bit position.
 *
 * **RV32 Description**:\n
 * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified
 * by Rs1(4,1) starting from a specified immediate LSB bit position, #LSBloc. The extracted word is
 * written to Rd.
 * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register
 * pair includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d`
 * register of the pair contains the low 32-bit of the 64-bit value.
 *
 * **RV64 Description**:\n
 * This instruction extracts a 32-bit word from a 64-bit value in Rs1 starting from a specified
 * immediate LSB bit position, #LSBloc. The extracted word is sign-extended and written to lower 32-
 * bit of Rd.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs2(4,1),1'b1);
 * src[63:0] = Concat(R[Idx1], R[Idx0]);
 * Rd = src[31+LSBloc:LSBloc];
 * * RV64:
 * ExtractW = Rs1[31+LSBloc:LSBloc];
 * Rd = SE(ExtractW)
 * ~~~
 *
 * \param [in]  a    long long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_WEXTI(a, b)    \
    ({    \
        unsigned long result;    \
        long long __a = (long long)(a);    \
        __ASM volatile("wexti %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 3.186. WEXTI ===== */

/* ===== Inline Function Start for 3.187. WEXT ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
 * \brief WEXT (Extract Word from 64-bit)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * WEXT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or
 * a register (RV64) starting from a specified LSB bit position in a register.
 *
 * **RV32 Description**:\n
 * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified
 * by Rs1(4,1) starting from a specified LSB bit position, specified in Rs2[4:0]. The extracted word is
 * written to Rd.
 * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register
 * pair includes register 2d and 2d+1.
 * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d`
 * register of the pair contains the low 32-bit of the 64-bit value.
 *
 * **Operations**:\n
 * ~~~
 * * RV32:
 * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1);
 * src[63:0] = Concat(R[Idx1], R[Idx0]);
 * LSBloc = Rs2[4:0];
 * Rd = src[31+LSBloc:LSBloc];
 * * RV64:
 * LSBloc = Rs2[4:0];
 * ExtractW = Rs1[31+LSBloc:LSBloc];
 * Rd = SE(ExtractW)
 * ~~~
 *
 * \param [in]  a    long long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_WEXT(long long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("wext %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 3.187. WEXT ===== */

/* ===== Inline Function Start for 3.188.1. ZUNPKD810 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief ZUNPKD810 (Unsigned Unpacking Bytes 1 & 0)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * ZUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
 * halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
 * // ZUNPKD810, x=1,y=0
 * // ZUNPKD820, x=2,y=0
 * // ZUNPKD830, x=3,y=0
 * // ZUNPKD831, x=3,y=1
 * // ZUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD810(unsigned long a)
{
    unsigned long result;
    __ASM volatile("zunpkd810 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.188.1. ZUNPKD810 ===== */

/* ===== Inline Function Start for 3.188.2. ZUNPKD820 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief ZUNPKD820 (Unsigned Unpacking Bytes 2 & 0)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * ZUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
 * halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
 * // ZUNPKD810, x=1,y=0
 * // ZUNPKD820, x=2,y=0
 * // ZUNPKD830, x=3,y=0
 * // ZUNPKD831, x=3,y=1
 * // ZUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD820(unsigned long a)
{
    unsigned long result;
    __ASM volatile("zunpkd820 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.188.2. ZUNPKD820 ===== */

/* ===== Inline Function Start for 3.188.3. ZUNPKD830 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief ZUNPKD830 (Unsigned Unpacking Bytes 3 & 0)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * ZUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
 * halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
 * // ZUNPKD810, x=1,y=0
 * // ZUNPKD820, x=2,y=0
 * // ZUNPKD830, x=3,y=0
 * // ZUNPKD831, x=3,y=1
 * // ZUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD830(unsigned long a)
{
    unsigned long result;
    __ASM volatile("zunpkd830 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.188.3. ZUNPKD830 ===== */

/* ===== Inline Function Start for 3.188.4. ZUNPKD831 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief ZUNPKD831 (Unsigned Unpacking Bytes 3 & 1)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * ZUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
 * halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
 * // ZUNPKD810, x=1,y=0
 * // ZUNPKD820, x=2,y=0
 * // ZUNPKD830, x=3,y=0
 * // ZUNPKD831, x=3,y=1
 * // ZUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD831(unsigned long a)
{
    unsigned long result;
    __ASM volatile("zunpkd831 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.188.4. ZUNPKD831 ===== */

/* ===== Inline Function Start for 3.188.5. ZUNPKD832 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
 * \brief ZUNPKD832 (Unsigned Unpacking Bytes 3 & 2)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * ZUNPKD8xy Rd, Rs1
 * xy = {10, 20, 30, 31, 32}
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
 * halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
 * chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
 * // ZUNPKD810, x=1,y=0
 * // ZUNPKD820, x=2,y=0
 * // ZUNPKD830, x=3,y=0
 * // ZUNPKD831, x=3,y=1
 * // ZUNPKD832, x=3,y=2
 * for RV32: m=0,
 * for RV64: m=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD832(unsigned long a)
{
    unsigned long result;
    __ASM volatile("zunpkd832 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 3.188.5. ZUNPKD832 ===== */

#if (__RISCV_XLEN == 64) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__)

/* ===== Inline Function Start for 4.1. ADD32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief ADD32 (SIMD 32-bit Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * ADD32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer elements in Rs1 with the 32-bit integer
 * elements in Rs2, and then writes the 32-bit element results to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned addition.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x] + Rs2.W[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_ADD32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("add32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.1. ADD32 ===== */

/* ===== Inline Function Start for 4.2. CRAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief CRAS32 (SIMD 32-bit Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * CRAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit
 * chunk simultaneously. Operands are from crossed 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
 * integer element in [31:0] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts
 * the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and
 * writes the result to [31:0] of Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = Rs1.W[1] + Rs2.W[0];
 * Rd.W[0] = Rs1.W[0] - Rs2.W[1];
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CRAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("cras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.2. CRAS32 ===== */

/* ===== Inline Function Start for 4.3. CRSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief CRSA32 (SIMD 32-bit Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * CRSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit
 * chunk simultaneously. Operands are from crossed 32-bit elements.
 * *Description: *
 * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element
 * in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer
 * element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2, and writes the result to
 * [31:0] of Rd
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = Rs1.W[1] - Rs2.W[0];
 * Rd.W[0] = Rs1.W[0] + Rs2.W[1];
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_CRSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("crsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.3. CRSA32 ===== */

/* ===== Inline Function Start for 4.4. KABS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
 * \brief KABS32 (Scalar 32-bit Absolute Value with Saturation)
 * \details
 * **Type**: DSP (RV64 Only)
24    20
19    15
14    12
11    7
KABS32
10010
Rs1
000
Rd
6    0
GE80B
1111111
 *
 * **Syntax**:\n
 * ~~~
 * KABS32 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Get the absolute value of signed 32-bit integer elements in a general register.
 *
 * **Description**:\n
 * This instruction calculates the absolute value of signed 32-bit integer elements stored
 * in Rs1. The results are written to Rd. This instruction with the minimum negative integer input of
 * 0x80000000 will produce a saturated output of maximum positive integer of 0x7fffffff and the OV
 * flag will be set to 1.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs1.W[x] >= 0) {
 *   res[x] = Rs1.W[x];
 * } else {
 *   If (Rs1.W[x] == 0x80000000) {
 *     res[x] = 0x7fffffff;
 *     OV = 1;
 *   } else {
 *     res[x] = -Rs1.W[x];
 *   }
 * }
 * Rd.W[x] = res[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KABS32(unsigned long a)
{
    unsigned long result;
    __ASM volatile("kabs32 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for 4.4. KABS32 ===== */

/* ===== Inline Function Start for 4.5. KADD32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief KADD32 (SIMD 32-bit Signed Saturating Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KADD32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed
 * integer elements in Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1),
 * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.W[x] + Rs2.W[x];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KADD32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.5. KADD32 ===== */

/* ===== Inline Function Start for 4.6. KCRAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief KCRAS32 (SIMD 32-bit Signed Saturating Cross Addition & Subtraction)
 * \details
 * **Type**: SIM (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KCRAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating addition and 32-bit signed integer element
 * saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
 * integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit integer element in [63:32] of
 * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number
 * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] + Rs2.W[0];
 * res[0] = Rs1.W[0] - Rs2.W[1];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[1] = res[1];
 * Rd.W[0] = res[0];
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KCRAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.6. KCRAS32 ===== */

/* ===== Inline Function Start for 4.7. KCRSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief KCRSA32 (SIMD 32-bit Signed Saturating Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KCRSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element
 * saturating addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.
 * *Description: *
 * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element
 * in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit
 * integer element in [63:32] of Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31
 * <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
 * [63:32] of Rd for subtraction and [31:0] of Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] - Rs2.W[0];
 * res[0] = Rs1.W[0] + Rs2.W[1];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[1] = res[1];
 * Rd.W[0] = res[0];
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KCRSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.7. KCRSA32 ===== */

/* ===== Inline Function Start for 4.8.1. KDMBB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KDMBB16 (SIMD Signed Saturating Double Multiply B16 x B16)
 * \details
 * **Type**: SIMD (RV64 only)
 *
 * **Syntax**:\n
 * ~~~
 * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
 * in the destination register. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
 * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
 * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
 * and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
 *   Mresult[z] = aop[z] * bop[z];
 *   resQ31[z] = Mresult[z] << 1;
 * } else {
 *   resQ31[z] = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * Rd.W[z] = resQ31[z];
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KDMBB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kdmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.8.1. KDMBB16 ===== */

/* ===== Inline Function Start for 4.8.2. KDMBT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KDMBT16 (SIMD Signed Saturating Double Multiply B16 x T16)
 * \details
 * **Type**: SIMD (RV64 only)
 *
 * **Syntax**:\n
 * ~~~
 * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
 * in the destination register. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
 * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
 * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
 * and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
 *   Mresult[z] = aop[z] * bop[z];
 *   resQ31[z] = Mresult[z] << 1;
 * } else {
 *   resQ31[z] = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * Rd.W[z] = resQ31[z];
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KDMBT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kdmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.8.2. KDMBT16 ===== */

/* ===== Inline Function Start for 4.8.3. KDMTT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KDMTT16 (SIMD Signed Saturating Double Multiply T16 x T16)
 * \details
 * **Type**: SIMD (RV64 only)
 *
 * **Syntax**:\n
 * ~~~
 * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
 * in the destination register. If saturation happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
 * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
 * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
 * and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
 *   Mresult[z] = aop[z] * bop[z];
 *   resQ31[z] = Mresult[z] << 1;
 * } else {
 *   resQ31[z] = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * Rd.W[z] = resQ31[z];
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KDMTT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kdmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.8.3. KDMTT16 ===== */

/* ===== Inline Function Start for 4.9.1. KDMABB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KDMABB16 (SIMD Signed Saturating Double Multiply Addition B16 x B16)
 * \details
 * **Type**: SIMD (RV64 only)
 *
 * **Syntax**:\n
 * ~~~
 * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
 * the values of the corresponding 32-bit chunks from the destination register and write the saturated
 * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
 * happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
 * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
 * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
 * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
 * are written back to Rd.
 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
 * set.
 *
 * **Operations**:\n
 * ~~~
 * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
 *   Mresult[z] = aop[z] * bop[z];
 *   resQ31[z] = Mresult[z] << 1;
 * } else {
 *   resQ31[z] = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * resadd[z] = Rd.W[z] + resQ31[z];
 * if (resadd[z] > (2^31)-1) {
 *   resadd[z] = (2^31)-1;
 *   OV = 1;
 * } else if (resadd[z] < -2^31) {
 *   resadd[z] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[z] = resadd[z];
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KDMABB16(unsigned long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kdmabb16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.9.1. KDMABB16 ===== */

/* ===== Inline Function Start for 4.9.2. KDMABT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KDMABT16 (SIMD Signed Saturating Double Multiply Addition B16 x T16)
 * \details
 * **Type**: SIMD (RV64 only)
 *
 * **Syntax**:\n
 * ~~~
 * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
 * the values of the corresponding 32-bit chunks from the destination register and write the saturated
 * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
 * happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
 * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
 * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
 * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
 * are written back to Rd.
 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
 * set.
 *
 * **Operations**:\n
 * ~~~
 * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
 *   Mresult[z] = aop[z] * bop[z];
 *   resQ31[z] = Mresult[z] << 1;
 * } else {
 *   resQ31[z] = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * resadd[z] = Rd.W[z] + resQ31[z];
 * if (resadd[z] > (2^31)-1) {
 *   resadd[z] = (2^31)-1;
 *   OV = 1;
 * } else if (resadd[z] < -2^31) {
 *   resadd[z] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[z] = resadd[z];
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KDMABT16(unsigned long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kdmabt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.9.2. KDMABT16 ===== */

/* ===== Inline Function Start for 4.9.3. KDMATT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KDMATT16 (SIMD Signed Saturating Double Multiply Addition T16 x T16)
 * \details
 * **Type**: SIMD (RV64 only)
 *
 * **Syntax**:\n
 * ~~~
 * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
 * the values of the corresponding 32-bit chunks from the destination register and write the saturated
 * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
 * happens, an overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
 * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
 * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
 * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
 * are written back to Rd.
 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
 * set.
 *
 * **Operations**:\n
 * ~~~
 * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
 *   Mresult[z] = aop[z] * bop[z];
 *   resQ31[z] = Mresult[z] << 1;
 * } else {
 *   resQ31[z] = 0x7FFFFFFF;
 *   OV = 1;
 * }
 * resadd[z] = Rd.W[z] + resQ31[z];
 * if (resadd[z] > (2^31)-1) {
 *   resadd[z] = (2^31)-1;
 *   OV = 1;
 * } else if (resadd[z] < -2^31) {
 *   resadd[z] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[z] = resadd[z];
 * ~~~
 *
 * \param [in]  t    unsigned long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KDMATT16(unsigned long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kdmatt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.9.3. KDMATT16 ===== */

/* ===== Inline Function Start for 4.10.1. KHMBB16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KHMBB16 (SIMD Signed Saturating Half Multiply B16 x B16)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
 * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
 * overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
 * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
 * to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop = Rs1.H[x]; bop = Rs2.H[y];
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult[31:0] = aop * bop;
 *   res[15:0] = Mresult[30:15];
 * } else {
 *   res[15:0] = 0x7FFF;
 *   OV = 1;
 * }
 * Rd.W[z] = SE32(res[15:0]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KHMBB16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("khmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.10.1. KHMBB16 ===== */

/* ===== Inline Function Start for 4.10.2. KHMBT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KHMBT16 (SIMD Signed Saturating Half Multiply B16 x T16)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
 * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
 * overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
 * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
 * to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop = Rs1.H[x]; bop = Rs2.H[y];
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult[31:0] = aop * bop;
 *   res[15:0] = Mresult[30:15];
 * } else {
 *   res[15:0] = 0x7FFF;
 *   OV = 1;
 * }
 * Rd.W[z] = SE32(res[15:0]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KHMBT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("khmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.10.2. KHMBT16 ===== */

/* ===== Inline Function Start for 4.10.3. KHMTT16 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
 * \brief KHMTT16 (SIMD Signed Saturating Half Multiply T16 x T16)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
 * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
 * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
 * overflow flag OV will be set.
 *
 * **Description**:\n
 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
 * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
 * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
 * to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
 * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
 * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
 * aop = Rs1.H[x]; bop = Rs2.H[y];
 * If (0x8000 != aop | 0x8000 != bop) {
 *   Mresult[31:0] = aop * bop;
 *   res[15:0] = Mresult[30:15];
 * } else {
 *   res[15:0] = 0x7FFF;
 *   OV = 1;
 * }
 * Rd.W[z] = SE32(res[15:0]);
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KHMTT16(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("khmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.10.3. KHMTT16 ===== */

/* ===== Inline Function Start for 4.11.1. KMABB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
 * \brief KMABB32 (Saturating Signed Multiply Bottom Words & Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMABB32 Rd, Rs1, Rs2
 * KMABT32 Rd, Rs1, Rs2
 * KMATT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element in a register with the 32-bit element in another register
 * and add the result to the content of 64-bit data in the third register. The addition result may be
 * saturated and is written to the third register.
 * * KMABB32: rd + bottom*bottom
 * * KMABT32: rd + bottom*top
 * * KMATT32: rd + top*top
 *
 * **Description**:\n
 * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2.
 * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
 * element in Rs2.
 * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
 * element in Rs2.
 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
 * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
 * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
 *  res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
 *  res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
 *  if (res > (2^63)-1) {
 *    res = (2^63)-1;
 *    OV = 1;
 *  } else if (res < -2^63) {
 *    res = -2^63;
 *    OV = 1;
 *  }
 *  Rd = res;
 * *Exceptions:* None
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMABB32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmabb32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.11.1. KMABB32 ===== */

/* ===== Inline Function Start for 4.11.2. KMABT32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
 * \brief KMABT32 (Saturating Signed Multiply Bottom & Top Words & Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMABB32 Rd, Rs1, Rs2
 * KMABT32 Rd, Rs1, Rs2
 * KMATT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element in a register with the 32-bit element in another register
 * and add the result to the content of 64-bit data in the third register. The addition result may be
 * saturated and is written to the third register.
 * * KMABB32: rd + bottom*bottom
 * * KMABT32: rd + bottom*top
 * * KMATT32: rd + top*top
 *
 * **Description**:\n
 * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2.
 * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
 * element in Rs2.
 * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
 * element in Rs2.
 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
 * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
 * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
 *  res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
 *  res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
 *  if (res > (2^63)-1) {
 *    res = (2^63)-1;
 *    OV = 1;
 *  } else if (res < -2^63) {
 *    res = -2^63;
 *    OV = 1;
 *  }
 *  Rd = res;
 * *Exceptions:* None
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMABT32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmabt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.11.2. KMABT32 ===== */

/* ===== Inline Function Start for 4.11.3. KMATT32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
 * \brief KMATT32 (Saturating Signed Multiply Top Words & Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMABB32 Rd, Rs1, Rs2
 * KMABT32 Rd, Rs1, Rs2
 * KMATT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element in a register with the 32-bit element in another register
 * and add the result to the content of 64-bit data in the third register. The addition result may be
 * saturated and is written to the third register.
 * * KMABB32: rd + bottom*bottom
 * * KMABT32: rd + bottom*top
 * * KMATT32: rd + top*top
 *
 * **Description**:\n
 * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2.
 * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
 * element in Rs2.
 * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
 * element in Rs2.
 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
 * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
 * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
 *  res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
 *  res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
 *  if (res > (2^63)-1) {
 *    res = (2^63)-1;
 *    OV = 1;
 *  } else if (res < -2^63) {
 *    res = -2^63;
 *    OV = 1;
 *  }
 *  Rd = res;
 * *Exceptions:* None
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMATT32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmatt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.11.3. KMATT32 ===== */

/* ===== Inline Function Start for 4.12.1. KMADA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMADA32 (Saturating Signed Multiply Two Words and Two Adds)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMADA32 Rd, Rs1, Rs2
 * KMAXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the
 * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated.
 * * KMADA32: rd + top*top + bottom*bottom
 * * KMAXDA32: rd + top*bottom + bottom*top
 *
 * **Description**:\n
 * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-
 * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1
 * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction.
 * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
 * with the top 32-bit element in Rs2.
 * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63
 * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit
 * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32
 * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMADA32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.12.1. KMADA32 ===== */

/* ===== Inline Function Start for 4.12.2. KMAXDA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMAXDA32 (Saturating Signed Crossed Multiply Two Words and Two Adds)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMADA32 Rd, Rs1, Rs2
 * KMAXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the
 * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated.
 * * KMADA32: rd + top*top + bottom*bottom
 * * KMAXDA32: rd + top*bottom + bottom*top
 *
 * **Description**:\n
 * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-
 * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1
 * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction.
 * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
 * with the top 32-bit element in Rs2.
 * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63
 * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit
 * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32
 * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMAXDA32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmaxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.12.2. KMAXDA32 ===== */

/* ===== Inline Function Start for 4.13.1. KMDA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMDA32 (Signed Multiply Two Words and Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMDA32 Rd, Rs1, Rs2
 * KMXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
 * adds the two 64-bit results together. The addition result may be saturated.
 * * KMDA32: top*top + bottom*bottom
 * * KMXDA32: top*bottom + bottom*top
 *
 * **Description**:\n
 * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
 * with the top 32-bit element of Rs2.
 * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
 * with the bottom 32-bit element of Rs2.
 * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1.
 * The final result is written to Rd. The 32-bit contents are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) {
 *   Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32
 *   Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32
 * } else {
 *   Rd = 0x7fffffffffffffff;
 *   OV = 1;
 * }
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMDA32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("kmda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.13.1. KMDA32 ===== */

/* ===== Inline Function Start for 4.13.2. KMXDA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMXDA32 (Signed Crossed Multiply Two Words and Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMDA32 Rd, Rs1, Rs2
 * KMXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
 * adds the two 64-bit results together. The addition result may be saturated.
 * * KMDA32: top*top + bottom*bottom
 * * KMXDA32: top*bottom + bottom*top
 *
 * **Description**:\n
 * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
 * with the top 32-bit element of Rs2.
 * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
 * with the bottom 32-bit element of Rs2.
 * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1.
 * The final result is written to Rd. The 32-bit contents are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) {
 *   Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32
 *   Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32
 * } else {
 *   Rd = 0x7fffffffffffffff;
 *   OV = 1;
 * }
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMXDA32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("kmxda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.13.2. KMXDA32 ===== */

/* ===== Inline Function Start for 4.14.1. KMADS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMADS32 (Saturating Signed Multiply Two Words & Subtract & Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMADS32 Rd, Rs1, Rs2
 * KMADRS32 Rd, Rs1, Rs2
 * KMAXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
 * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
 * 64-bit data in a third register. The addition result may be saturated.
 * * KMADS32: rd + (top*top - bottom*bottom)
 * * KMADRS32: rd + (bottom*bottom - top*top)
 * * KMAXDS32: rd + (top*bottom - bottom*top)
 *
 * **Description**:\n
 * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
 * Rs1 with the top 32-bit element in Rs2.
 * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element in Rs1 with the bottom 32-bit element in Rs2.
 * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
 * Rs1 with the bottom 32-bit element in Rs2.
 * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
 * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
 * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
 * as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
 * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
 * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMADS32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmads32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.14.1. KMADS32 ===== */

/* ===== Inline Function Start for 4.14.2. KMADRS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMADRS32 (Saturating Signed Multiply Two Words & Reverse Subtract & Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMADS32 Rd, Rs1, Rs2
 * KMADRS32 Rd, Rs1, Rs2
 * KMAXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
 * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
 * 64-bit data in a third register. The addition result may be saturated.
 * * KMADS32: rd + (top*top - bottom*bottom)
 * * KMADRS32: rd + (bottom*bottom - top*top)
 * * KMAXDS32: rd + (top*bottom - bottom*top)
 *
 * **Description**:\n
 * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
 * Rs1 with the top 32-bit element in Rs2.
 * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element in Rs1 with the bottom 32-bit element in Rs2.
 * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
 * Rs1 with the bottom 32-bit element in Rs2.
 * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
 * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
 * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
 * as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
 * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
 * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMADRS32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmadrs32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.14.2. KMADRS32 ===== */

/* ===== Inline Function Start for 4.14.3. KMAXDS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMAXDS32 (Saturating Signed Crossed Multiply Two Words & Subtract & Add)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMADS32 Rd, Rs1, Rs2
 * KMADRS32 Rd, Rs1, Rs2
 * KMAXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
 * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
 * 64-bit data in a third register. The addition result may be saturated.
 * * KMADS32: rd + (top*top - bottom*bottom)
 * * KMADRS32: rd + (bottom*bottom - top*top)
 * * KMAXDS32: rd + (top*bottom - bottom*top)
 *
 * **Description**:\n
 * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
 * Rs1 with the top 32-bit element in Rs2.
 * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element in Rs1 with the bottom 32-bit element in Rs2.
 * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
 * Rs1 with the bottom 32-bit element in Rs2.
 * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
 * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
 * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
 * as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
 * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
 * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMAXDS32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmaxds32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.14.3. KMAXDS32 ===== */

/* ===== Inline Function Start for 4.15.1. KMSDA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMSDA32 (Saturating Signed Multiply Two Words & Add & Subtract)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMSDA32 Rd, Rs1, Rs2
 * KMSXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
 * subtracts the two 64-bit results from a third register. The subtraction result may be saturated.
 * * KMSDA: rd - top*top - bottom*bottom
 * * KMSXDA: rd - top*bottom - bottom*top
 *
 * **Description**:\n
 * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
 * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
 * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction
 * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit
 * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32
 * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMSDA32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmsda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.15.1. KMSDA32 ===== */

/* ===== Inline Function Start for 4.15.2. KMSXDA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief KMSXDA32 (Saturating Signed Crossed Multiply Two Words & Add & Subtract)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KMSDA32 Rd, Rs1, Rs2
 * KMSXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
 * subtracts the two 64-bit results from a third register. The subtraction result may be saturated.
 * * KMSDA: rd - top*top - bottom*bottom
 * * KMSXDA: rd - top*bottom - bottom*top
 *
 * **Description**:\n
 * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
 * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
 * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction
 * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit
 * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32
 * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t    long type of value stored in t
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_KMSXDA32(long t, unsigned long a, unsigned long b)
{
    __ASM volatile("kmsxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for 4.15.2. KMSXDA32 ===== */

/* ===== Inline Function Start for 4.16. KSLL32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief KSLL32 (SIMD 32-bit Saturating Shift Left Logical)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KSLL32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift
 * amount is a variable from a GPR.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
 * with zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register.
 * Any shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is
 * saturated to -2^31. And the saturated results are written to Rd. If any saturation is performed, set OV
 * bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[4:0];
 * if (sa != 0) {
 *   res[(31+sa):0] = Rs1.W[x] << sa;
 *   if (res > (2^31)-1) {
 *     res = 0x7fffffff; OV = 1;
 *   } else if (res < -2^31) {
 *     res = 0x80000000; OV = 1;
 *   }
 *   Rd.W[x] = res[31:0];
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLL32(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("ksll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.16. KSLL32 ===== */

/* ===== Inline Function Start for 4.17. KSLLI32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief KSLLI32 (SIMD 32-bit Saturating Shift Left Logical Immediate)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KSLLI32 Rd, Rs1, imm5u
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift
 * amount is an immediate value.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
 * with zero and the shift amount is specified by the imm5u constant. Any shifted value greater than
 * 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated
 * results are written to Rd. If any saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u[4:0];
 * if (sa != 0) {
 *   res[(31+sa):0] = Rs1.W[x] << sa;
 *   if (res > (2^31)-1) {
 *     res = 0x7fffffff; OV = 1;
 *   } else if (res < -2^31) {
 *     res = 0x80000000; OV = 1;
 *   }
 *   Rd.W[x] = res[31:0];
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_KSLLI32(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("kslli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 4.17. KSLLI32 ===== */

/* ===== Inline Function Start for 4.18.1. KSLRA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief KSLRA32 (SIMD 32-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KSLRA32 Rd, Rs1, Rs2
 * KSLRA32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the
 * right shift.
 *
 * **Description**:\n
 * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
 * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be
 * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`.
 * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u`
 * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[5:0] < 0) {
 *   sa = -Rs2[5:0];
 *   sa = (sa == 32)? 31 : sa;
 *   if (`.u` form) {
 *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   } else {
 *     Rd.W[x] = SE32(Rs1.W[x][31:sa]);
 *   }
 * } else {
 *   sa = Rs2[4:0];
 *   res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
 *   if (res > (2^31)-1) {
 *     res[31:0] = 0x7fffffff; OV = 1;
 *   } else if (res < -2^31) {
 *     res[31:0] = 0x80000000; OV = 1;
 *   }
 *   Rd.W[x] = res[31:0];
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLRA32(unsigned long a, int b)
{
    unsigned long result;
    __ASM volatile("kslra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.18.1. KSLRA32 ===== */

/* ===== Inline Function Start for 4.18.2. KSLRA32.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief KSLRA32.u (SIMD 32-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KSLRA32 Rd, Rs1, Rs2
 * KSLRA32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the
 * right shift.
 *
 * **Description**:\n
 * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
 * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be
 * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`.
 * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u`
 * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[5:0] < 0) {
 *   sa = -Rs2[5:0];
 *   sa = (sa == 32)? 31 : sa;
 *   if (`.u` form) {
 *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   } else {
 *     Rd.W[x] = SE32(Rs1.W[x][31:sa]);
 *   }
 * } else {
 *   sa = Rs2[4:0];
 *   res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
 *   if (res > (2^31)-1) {
 *     res[31:0] = 0x7fffffff; OV = 1;
 *   } else if (res < -2^31) {
 *     res[31:0] = 0x80000000; OV = 1;
 *   }
 *   Rd.W[x] = res[31:0];
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSLRA32_U(unsigned long a, int b)
{
    unsigned long result;
    __ASM volatile("kslra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.18.2. KSLRA32.u ===== */

/* ===== Inline Function Start for 4.19. KSTAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief KSTAS32 (SIMD 32-bit Signed Saturating Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KSTAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating addition and 32-bit signed integer element
 * saturating subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit
 * elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
 * integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit integer element in [31:0] of
 * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number
 * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] + Rs2.W[1];
 * res[0] = Rs1.W[0] - Rs2.W[0];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[1] = res[1];
 * Rd.W[0] = res[0];
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSTAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.19. KSTAS32 ===== */

/* ===== Inline Function Start for 4.20. KSTSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief KSTSA32 (SIMD 32-bit Signed Saturating Straight Subtraction & Addition)
 * \details
 * **Type**: SIM (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KSTSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element
 * saturating addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit
 * elements.
 * *Description: *
 * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer
 * element in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with
 * the 32-bit integer element in [31:0] of Rs2. If any of the results are beyond the Q31 number range (
 * -2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
 * written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] - Rs2.W[1];
 * res[0] = Rs1.W[0] + Rs2.W[0];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[1] = res[1];
 * Rd.W[0] = res[0];
 * for RV64, x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSTSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("kstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.20. KSTSA32 ===== */

/* ===== Inline Function Start for 4.21. KSUB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief KSUB32 (SIMD 32-bit Signed Saturating Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * KSUB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit
 * signed integer elements in Rs1. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <=
 * 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.W[x] - Rs2.W[x];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_KSUB32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.21. KSUB32 ===== */

/* ===== Inline Function Start for 4.22.1. PKBB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
 * \brief PKBB32 (Pack Two 32-bit Data from Both Bottom Half)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * PKBB32 Rd, Rs1, Rs2
 * PKBT32 Rd, Rs1, Rs2
 * PKTT32 Rd, Rs1, Rs2
 * PKTB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * PKBB32: bottom.bottom
 * * PKBT32: bottom.top
 * * PKTT32: top.top
 * * PKTB32: top.bottom
 *
 * **Description**:\n
 * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKBB32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pkbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.22.1. PKBB32 ===== */

/* ===== Inline Function Start for 4.22.2. PKBT32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
 * \brief PKBT32 (Pack Two 32-bit Data from Bottom and Top Half)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * PKBB32 Rd, Rs1, Rs2
 * PKBT32 Rd, Rs1, Rs2
 * PKTT32 Rd, Rs1, Rs2
 * PKTB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * PKBB32: bottom.bottom
 * * PKBT32: bottom.top
 * * PKTT32: top.top
 * * PKTB32: top.bottom
 *
 * **Description**:\n
 * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKBT32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pkbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.22.2. PKBT32 ===== */

/* ===== Inline Function Start for 4.22.3. PKTT32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
 * \brief PKTT32 (Pack Two 32-bit Data from Both Top Half)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * PKBB32 Rd, Rs1, Rs2
 * PKBT32 Rd, Rs1, Rs2
 * PKTT32 Rd, Rs1, Rs2
 * PKTB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * PKBB32: bottom.bottom
 * * PKBT32: bottom.top
 * * PKTT32: top.top
 * * PKTB32: top.bottom
 *
 * **Description**:\n
 * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKTT32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pktt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.22.3. PKTT32 ===== */

/* ===== Inline Function Start for 4.22.4. PKTB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
 * \brief PKTB32 (Pack Two 32-bit Data from Top and Bottom Half)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * PKBB32 Rd, Rs1, Rs2
 * PKBT32 Rd, Rs1, Rs2
 * PKTT32 Rd, Rs1, Rs2
 * PKTB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * PKBB32: bottom.bottom
 * * PKBT32: bottom.top
 * * PKTT32: top.top
 * * PKTB32: top.bottom
 *
 * **Description**:\n
 * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_PKTB32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("pktb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.22.4. PKTB32 ===== */

/* ===== Inline Function Start for 4.23. RADD32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief RADD32 (SIMD 32-bit Signed Halving Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * RADD32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element additions simultaneously. The results are halved to avoid
 * overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed
 * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
 * Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF Rd = 0x7FFFFFFF
 * * Rs1 = 0x80000000, Rs2 = 0x80000000 Rd = 0x80000000
 * * Rs1 = 0x40000000, Rs2 = 0x80000000 Rd = 0xE0000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] + Rs2.W[x]) s>> 1;
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RADD32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("radd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.23. RADD32 ===== */

/* ===== Inline Function Start for 4.24. RCRAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief RCRAS32 (SIMD 32-bit Signed Halving Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * RCRAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in
 * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit
 * signed integer element in [31:0] of Rs2, and subtracts the 32-bit signed integer element in [63:32] of
 * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first
 * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd
 * for subtraction.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD32` and `RSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) s>> 1;
 * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) s>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RCRAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.24. RCRAS32 ===== */

/* ===== Inline Function Start for 4.25. RCRSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief RCRSA32 (SIMD 32-bit Signed Halving Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * RCRSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in
 * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer element in [31:0] of Rs2 from the
 * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0]
 * of Rs1 with the 32-bit signed integer element in [63:32] of Rs2. The two results are first
 * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of
 * Rd for addition.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD32` and `RSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) s>> 1;
 * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) s>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RCRSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.25. RCRSA32 ===== */

/* ===== Inline Function Start for 4.26. RSTAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief RSTAS32 (SIMD 32-bit Signed Halving Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * RSTAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in
 * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are
 * halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit
 * signed integer element in [63:32] of Rs2, and subtracts the 32-bit signed integer element in [31:0] of
 * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first
 * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd
 * for subtraction.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD32` and `RSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) s>> 1;
 * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) s>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RSTAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.26. RSTAS32 ===== */

/* ===== Inline Function Start for 4.27. RSTSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief RSTSA32 (SIMD 32-bit Signed Halving Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * RSTSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in
 * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are
 * halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer element in [63:32] of Rs2 from the
 * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0]
 * of Rs1 with the 32-bit signed integer element in [31:0] of Rs2. The two results are first arithmetically
 * right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
 *
 * **Examples**:\n
 * ~~~
 * Please see `RADD32` and `RSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) s>> 1;
 * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) s>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RSTSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.27. RSTSA32 ===== */

/* ===== Inline Function Start for 4.28. RSUB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief RSUB32 (SIMD 32-bit Signed Halving Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * RSUB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element subtractions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit
 * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0x7FFFFFFF
 * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x80000000
 * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0xA0000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) s>> 1;
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_RSUB32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("rsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.28. RSUB32 ===== */

/* ===== Inline Function Start for 4.29. SLL32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SLL32 (SIMD 32-bit Shift Left Logical)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SLL32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements logical left shift operations simultaneously. The shift amount is a
 * variable from a GPR.
 *
 * **Description**:\n
 * The 32-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
 * The shifted out bits are filled with zero and the shift amount is specified by the low-order 5-bits of
 * the value in the Rs2 register.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[4:0];
 * Rd.W[x] = Rs1.W[x] << sa;
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SLL32(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("sll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.29. SLL32 ===== */

/* ===== Inline Function Start for 4.30. SLLI32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SLLI32 (SIMD 32-bit Shift Left Logical Immediate)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SLLI32 Rd, Rs1, imm5u[4:0]
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit element logical left shift operations simultaneously. The shift amount is an
 * immediate value.
 *
 * **Description**:\n
 * The 32-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with
 * zero and the shift amount is specified by the imm5u[4:0] constant. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u[4:0];
 * Rd.W[x] = Rs1.W[x] << sa;
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SLLI32(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("slli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 4.30. SLLI32 ===== */

/* ===== Inline Function Start for 4.31. SMAX32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
 * \brief SMAX32 (SIMD 32-bit Signed Maximum)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMAX32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer elements finding maximum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit
 * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] > Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SMAX32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("smax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.31. SMAX32 ===== */

/* ===== Inline Function Start for 4.32.1. SMBB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
 * \brief SMBB32 (Signed Multiply Bottom Word & Bottom Word)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMBB32 Rd, Rs1, Rs2
 * SMBT32 Rd, Rs1, Rs2
 * SMTT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
 * register and write the 64-bit result to a third register.
 * * SMBB32: bottom*bottom
 * * SMBT32: bottom*top
 * * SMTT32: top*top
 *
 * **Description**:\n
 * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2. It is actually an alias of `MULSR64` instruction.
 * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2.
 * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
 * of Rs2.
 * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
 * signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
 * // SMTT32 Rd = res;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMBB32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.32.1. SMBB32 ===== */

/* ===== Inline Function Start for 4.32.2. SMBT32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
 * \brief SMBT32 (Signed Multiply Bottom Word & Top Word)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMBB32 Rd, Rs1, Rs2
 * SMBT32 Rd, Rs1, Rs2
 * SMTT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
 * register and write the 64-bit result to a third register.
 * * SMBB32: bottom*bottom
 * * SMBT32: bottom*top
 * * SMTT32: top*top
 *
 * **Description**:\n
 * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2. It is actually an alias of `MULSR64` instruction.
 * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2.
 * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
 * of Rs2.
 * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
 * signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
 * // SMTT32 Rd = res;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMBT32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.32.2. SMBT32 ===== */

/* ===== Inline Function Start for 4.32.3. SMTT32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
 * \brief SMTT32 (Signed Multiply Top Word & Top Word)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMBB32 Rd, Rs1, Rs2
 * SMBT32 Rd, Rs1, Rs2
 * SMTT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
 * register and write the 64-bit result to a third register.
 * * SMBB32: bottom*bottom
 * * SMBT32: bottom*top
 * * SMTT32: top*top
 *
 * **Description**:\n
 * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2. It is actually an alias of `MULSR64` instruction.
 * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2.
 * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
 * of Rs2.
 * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
 * signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
 * // SMTT32 Rd = res;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMTT32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smtt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.32.3. SMTT32 ===== */

/* ===== Inline Function Start for 4.33.1. SMDS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief SMDS32 (Signed Multiply Two Words and Subtract)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMDS32 Rd, Rs1, Rs2
 * SMDRS32 Rd, Rs1, Rs2
 * SMXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
 * perform a subtraction operation between the two 64-bit results.
 * * SMDS32: top*top - bottom*bottom
 * * SMDRS32: bottom*bottom - top*top
 * * SMXDS32: top*bottom - bottom*top
 *
 * **Description**:\n
 * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the top 32-bit element of Rs2.
 * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element of Rs1 with the bottom 32-bit element of Rs2.
 * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the bottom 32-bit element of Rs2.
 * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
 * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
 * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMDS32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.33.1. SMDS32 ===== */

/* ===== Inline Function Start for 4.33.2. SMDRS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief SMDRS32 (Signed Multiply Two Words and Reverse Subtract)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMDS32 Rd, Rs1, Rs2
 * SMDRS32 Rd, Rs1, Rs2
 * SMXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
 * perform a subtraction operation between the two 64-bit results.
 * * SMDS32: top*top - bottom*bottom
 * * SMDRS32: bottom*bottom - top*top
 * * SMXDS32: top*bottom - bottom*top
 *
 * **Description**:\n
 * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the top 32-bit element of Rs2.
 * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element of Rs1 with the bottom 32-bit element of Rs2.
 * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the bottom 32-bit element of Rs2.
 * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
 * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
 * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMDRS32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smdrs32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.33.2. SMDRS32 ===== */

/* ===== Inline Function Start for 4.33.3. SMXDS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
 * \brief SMXDS32 (Signed Crossed Multiply Two Words and Subtract)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMDS32 Rd, Rs1, Rs2
 * SMDRS32 Rd, Rs1, Rs2
 * SMXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
 * perform a subtraction operation between the two 64-bit results.
 * * SMDS32: top*top - bottom*bottom
 * * SMDRS32: bottom*bottom - top*top
 * * SMXDS32: top*bottom - bottom*top
 *
 * **Description**:\n
 * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the top 32-bit element of Rs2.
 * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element of Rs1 with the bottom 32-bit element of Rs2.
 * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the bottom 32-bit element of Rs2.
 * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
 * integers.
 *
 * **Operations**:\n
 * ~~~
 * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
 * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
 * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_SMXDS32(unsigned long a, unsigned long b)
{
    long result;
    __ASM volatile("smxds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.33.3. SMXDS32 ===== */

/* ===== Inline Function Start for 4.34. SMIN32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
 * \brief SMIN32 (SIMD 32-bit Signed Minimum)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SMIN32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer elements finding minimum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit
 * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] < Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SMIN32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("smin32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.34. SMIN32 ===== */

/* ===== Inline Function Start for 4.35.1. SRA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRA32 (SIMD 32-bit Shift Right Arithmetic)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRA32 Rd, Rs1, Rs2
 * SRA32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
 * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
 * added to the most significant discarded bit of each 32-bit data element to calculate the final results.
 * And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[4:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA32.u
 *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRA32
 *     Rd.W[x] = SE32(Rs1.W[x][31:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRA32(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.35.1. SRA32 ===== */

/* ===== Inline Function Start for 4.35.2. SRA32.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRA32.u (SIMD 32-bit Rounding Shift Right Arithmetic)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRA32 Rd, Rs1, Rs2
 * SRA32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
 * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
 * added to the most significant discarded bit of each 32-bit data element to calculate the final results.
 * And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[4:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA32.u
 *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRA32
 *     Rd.W[x] = SE32(Rs1.W[x][31:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRA32_U(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("sra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.35.2. SRA32.u ===== */

/* ===== Inline Function Start for 4.36.1. SRAI32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRAI32 (SIMD 32-bit Shift Right Arithmetic Immediate)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRAI32 Rd, Rs1, imm5u
 * SRAI32.u Rd, Rs1, imm5u
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is
 * an immediate value. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the
 * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
 * significant discarded bit of each 32-bit data to calculate the final results. And the results are written
 * to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u[4:0];
 *   if (sa > 0) {
 *   if (`.u` form) { // SRAI32.u
 *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRAI32
 *     Rd.W[x] = SE32(Rs1.W[x][31:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRAI32(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srai32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 4.36.1. SRAI32 ===== */

/* ===== Inline Function Start for 4.36.2. SRAI32.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRAI32.u (SIMD 32-bit Rounding Shift Right Arithmetic Immediate)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRAI32 Rd, Rs1, imm5u
 * SRAI32.u Rd, Rs1, imm5u
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is
 * an immediate value. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
 * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the
 * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
 * significant discarded bit of each 32-bit data to calculate the final results. And the results are written
 * to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u[4:0];
 *   if (sa > 0) {
 *   if (`.u` form) { // SRAI32.u
 *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRAI32
 *     Rd.W[x] = SE32(Rs1.W[x][31:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRAI32_U(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srai32.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 4.36.2. SRAI32.u ===== */

/* ===== Inline Function Start for 4.37. SRAIW.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT
 * \brief SRAIW.u (Rounding Shift Right Arithmetic Immediate Word)
 * \details
 * **Type**: DSP (RV64 only)
 *
 * **Syntax**:\n
 * ~~~
 * SRAIW.u Rd, Rs1, imm5u
 * ~~~
 *
 * **Purpose**:\n
 * Perform a 32-bit arithmetic right shift operation with rounding. The shift amount is an
 * immediate value.
 *
 * **Description**:\n
 * This instruction right-shifts the lower 32-bit content of Rs1 arithmetically. The shifted
 * out bits are filled with the sign-bit Rs1(31) and the shift amount is specified by the imm5u constant.
 * For the rounding operation, a value of 1 is added to the most significant discarded bit of the data to
 * calculate the final result. And the result is sign-extended and written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u;
 * if (sa != 0) {
 *   res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
 *   Rd = SE32(res[31:0]);
 * } else {
 *   Rd = SE32(Rs1.W[0]);
 * }
 * ~~~
 *
 * \param [in]  a    int type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in long type
 */
#define __RV_SRAIW_U(a, b)    \
    ({    \
        long result;    \
        int __a = (int)(a);    \
        __ASM volatile("sraiw.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 4.37. SRAIW.u ===== */

/* ===== Inline Function Start for 4.38.1. SRL32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRL32 (SIMD 32-bit Shift Right Logical)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRL32 Rd, Rs1, Rs2
 * SRL32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit element logical right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2
 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
 * discarded bit of each 32-bit data element to calculate the final results. And the results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[4:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA32.u
 *     res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRA32
 *     Rd.W[x] = ZE32(Rs1.W[x][31:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRL32(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("srl32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.38.1. SRL32 ===== */

/* ===== Inline Function Start for 4.38.2. SRL32.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRL32.u (SIMD 32-bit Rounding Shift Right Logical)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRL32 Rd, Rs1, Rs2
 * SRL32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit element logical right shift operations simultaneously. The shift amount is a
 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
 * results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2
 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
 * discarded bit of each 32-bit data element to calculate the final results. And the results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[4:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRA32.u
 *     res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRA32
 *     Rd.W[x] = ZE32(Rs1.W[x][31:sa])
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SRL32_U(unsigned long a, unsigned int b)
{
    unsigned long result;
    __ASM volatile("srl32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.38.2. SRL32.u ===== */

/* ===== Inline Function Start for 4.39.1. SRLI32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRLI32 (SIMD 32-bit Shift Right Logical Immediate)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRLI32 Rd, Rs1, imm5u
 * SRLI32.u Rd, Rs1, imm5u
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding
 * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit
 * data to calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u[4:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRLI32.u
 *     res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRLI32
 *     Rd.W[x] = ZE32(Rs1.W[x][31:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRLI32(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 4.39.1. SRLI32 ===== */

/* ===== Inline Function Start for 4.39.2. SRLI32.u ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
 * \brief SRLI32.u (SIMD 32-bit Rounding Shift Right Logical Immediate)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SRLI32 Rd, Rs1, imm5u
 * SRLI32.u Rd, Rs1, imm5u
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an
 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
 *
 * **Description**:\n
 * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
 * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding
 * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit
 * data to calculate the final results. And the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = imm5u[4:0];
 * if (sa > 0) {
 *   if (`.u` form) { // SRLI32.u
 *     res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
 *     Rd.W[x] = res[31:0];
 *   else { // SRLI32
 *     Rd.W[x] = ZE32(Rs1.W[x][31:sa]);
 *   }
 * } else {
 *   Rd = Rs1;
 * }
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned int type of value stored in b
 * \return value stored in unsigned long type
 */
#define __RV_SRLI32_U(a, b)    \
    ({    \
        unsigned long result;    \
        unsigned long __a = (unsigned long)(a);    \
        __ASM volatile("srli32.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for 4.39.2. SRLI32.u ===== */

/* ===== Inline Function Start for 4.40. STAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief STAS32 (SIMD 32-bit Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * STAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit
 * chunk simultaneously. Operands are from corresponding 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
 * integer element in [63:32] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts
 * the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and
 * writes the result to [31:0] of Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = Rs1.W[1] + Rs2.W[1];
 * Rd.W[0] = Rs1.W[0] - Rs2.W[0];
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_STAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("stas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.40. STAS32 ===== */

/* ===== Inline Function Start for 4.41. STSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief STSA32 (SIMD 32-bit Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * STSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit
 * chunk simultaneously. Operands are from corresponding 32-bit elements.
 * *Description: *
 * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer
 * element in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit
 * integer element in [31:0] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and writes the result
 * to [31:0] of Rd
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned operations.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = Rs1.W[1] - Rs2.W[1];
 * Rd.W[0] = Rs1.W[0] + Rs2.W[0];
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_STSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("stsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.41. STSA32 ===== */

/* ===== Inline Function Start for 4.42. SUB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief SUB32 (SIMD 32-bit Subtraction)
 * \details
 * **Type**: DSP (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * SUB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit integer elements in Rs2 from the 32-bit integer
 * elements in Rs1, and then writes the results to Rd.
 *
 * **Note**:\n
 * This instruction can be used for either signed or unsigned subtraction.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x] - Rs2.W[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_SUB32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("sub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.42. SUB32 ===== */

/* ===== Inline Function Start for 4.43. UKADD32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief UKADD32 (SIMD 32-bit Unsigned Saturating Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UKADD32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 32-bit unsigned integer elements in Rs1 with the 32-bit
 * unsigned integer elements in Rs2. If any of the results are beyond the 32-bit unsigned number
 * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.W[x] + Rs2.W[x];
 * if (res[x] > (2^32)-1) {
 *   res[x] = (2^32)-1;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKADD32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.43. UKADD32 ===== */

/* ===== Inline Function Start for 4.44. UKCRAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief UKCRAS32 (SIMD 32-bit Unsigned Saturating Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UKCRAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned
 * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed
 * 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
 * bit unsigned integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit unsigned
 * integer element in [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the
 * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
 * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and
 * [31:0] of Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[1] + Rs2.W[0];
 * res2 = Rs1.W[0] - Rs2.W[1];
 * if (res1 > (2^32)-1) {
 *   res1 = (2^32)-1;
 *   OV = 1;
 * }
 * if (res2 < 0) {
 *   res2 = 0;
 *   OV = 1;
 * }
 * Rd.W[1] = res1;
 * Rd.W[0] = res2;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKCRAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.44. UKCRAS32 ===== */

/* ===== Inline Function Start for 4.45. UKCRSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief UKCRSA32 (SIMD 32-bit Unsigned Saturating Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UKCRSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned
 * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from crossed
 * 32-bit elements.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the
 * 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned
 * integer element in [63:32] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the
 * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
 * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and
 * [31:0] of Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[1] - Rs2.W[0];
 * res2 = Rs1.W[0] + Rs2.W[1];
 * if (res1 < 0) {
 *   res1 = 0;
 *   OV = 1;
 * } else if (res2 > (2^32)-1) {
 *   res2 = (2^32)-1;
 *   OV = 1;
 * }
 * Rd.W[1] = res1;
 * Rd.W[0] = res2;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKCRSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.45. UKCRSA32 ===== */

/* ===== Inline Function Start for 4.46. UKSTAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief UKSTAS32 (SIMD 32-bit Unsigned Saturating Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UKSTAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned
 * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from
 * corresponding 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
 * bit unsigned integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit unsigned
 * integer element in [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the
 * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
 * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and
 * [31:0] of Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[1] + Rs2.W[1];
 * res2 = Rs1.W[0] - Rs2.W[0];
 * if (res1 > (2^32)-1) {
 *   res1 = (2^32)-1;
 *   OV = 1;
 * }
 * if (res2 < 0) {
 *   res2 = 0;
 *   OV = 1;
 * }
 * Rd.W[1] = res1;
 * Rd.W[0] = res2;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSTAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.46. UKSTAS32 ===== */

/* ===== Inline Function Start for 4.47. UKSTSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief UKSTSA32 (SIMD 32-bit Unsigned Saturating Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UKSTSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned
 * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from
 * corresponding 32-bit elements.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from
 * the 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned
 * integer element in [31:0] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the
 * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
 * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and
 * [31:0] of Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[1] - Rs2.W[1];
 * res2 = Rs1.W[0] + Rs2.W[0];
 * if (res1 < 0) {
 *   res1 = 0;
 *   OV = 1;
 * } else if (res2 > (2^32)-1) {
 *   res2 = (2^32)-1;
 *   OV = 1;
 * }
 * Rd.W[1] = res1;
 * Rd.W[0] = res2;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSTSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ukstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.47. UKSTSA32 ===== */

/* ===== Inline Function Start for 4.48. UKSUB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief UKSUB32 (SIMD 32-bit Unsigned Saturating Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UKSUB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit
 * unsigned integer elements in Rs1. If any of the results are beyond the 32-bit unsigned number
 * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.W[x] - Rs2.W[x];
 * if (res[x] < 0) {
 *   res[x] = 0;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UKSUB32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("uksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.48. UKSUB32 ===== */

/* ===== Inline Function Start for 4.49. UMAX32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
 * \brief UMAX32 (SIMD 32-bit Unsigned Maximum)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UMAX32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer elements finding maximum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit
 * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] u> Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UMAX32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("umax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.49. UMAX32 ===== */

/* ===== Inline Function Start for 4.50. UMIN32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
 * \brief UMIN32 (SIMD 32-bit Unsigned Minimum)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * UMIN32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer elements finding minimum operations simultaneously.
 *
 * **Description**:\n
 * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit
 * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
 * selected results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] <u Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_UMIN32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("umin32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.50. UMIN32 ===== */

/* ===== Inline Function Start for 4.51. URADD32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief URADD32 (SIMD 32-bit Unsigned Halving Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * URADD32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer element additions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit unsigned integer elements in Rs1 with the 32-bit
 * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFFFFFF, Rb = 0x7FFFFFFF Rt = 0x7FFFFFFF
 * * Ra = 0x80000000, Rb = 0x80000000 Rt = 0x80000000
 * * Ra = 0x40000000, Rb = 0x80000000 Rt = 0x60000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] + Rs2.W[x]) u>> 1;
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URADD32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("uradd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.51. URADD32 ===== */

/* ===== Inline Function Start for 4.52. URCRAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief URCRAS32 (SIMD 32-bit Unsigned Halving Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * URCRAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element
 * subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The
 * results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
 * bit unsigned integer element in [31:0] of Rs2, and subtracts the 32-bit unsigned integer element in
 * [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first
 * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for
 * subtraction.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD32` and `URSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) u>> 1;
 * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) u>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URCRAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.52. URCRAS32 ===== */

/* ===== Inline Function Start for 4.53. URCRSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief URCRSA32 (SIMD 32-bit Unsigned Halving Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * URCRSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element
 * addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results
 * are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the
 * 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer in
 * [31:0] of Rs1 with the 32-bit unsigned integer element in [63:32] of Rs2. The two results are first
 * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for
 * addition.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD32` and `URSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) u>> 1;
 * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) u>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URCRSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.53. URCRSA32 ===== */

/* ===== Inline Function Start for 4.54. URSTAS32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief URSTAS32 (SIMD 32-bit Unsigned Halving Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * URSTAS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element
 * subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements.
 * The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
 * bit unsigned integer element in [63:32] of Rs2, and subtracts the 32-bit unsigned integer element in
 * [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first
 * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for
 * subtraction.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD32` and `URSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) u>> 1;
 * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) u>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSTAS32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.54. URSTAS32 ===== */

/* ===== Inline Function Start for 4.55. URSTSA32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief URSTSA32 (SIMD 32-bit Unsigned Halving Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * URSTSA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element
 * addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The
 * results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from
 * the 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer
 * in [31:0] of Rs1 with the 32-bit unsigned integer element in [31:0] of Rs2. The two results are first
 * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for
 * addition.
 *
 * **Examples**:\n
 * ~~~
 * Please see `URADD32` and `URSUB32` instructions.
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) u>> 1;
 * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) u>> 1;
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSTSA32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("urstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.55. URSTSA32 ===== */

/* ===== Inline Function Start for 4.56. URSUB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
 * \brief URSUB32 (SIMD 32-bit Unsigned Halving Subtraction)
 * \details
 * **Type**: SIMD (RV64 Only)
 *
 * **Syntax**:\n
 * ~~~
 * URSUB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit unsigned integer element subtractions simultaneously. The results are halved to
 * avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit
 * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
 * written to Rd.
 *
 * **Examples**:\n
 * ~~~
 * * Ra = 0x7FFFFFFF, Rb = 0x80000000, Rt = 0xFFFFFFFF
 * * Ra = 0x80000000, Rb = 0x7FFFFFFF, Rt = 0x00000000
 * * Ra = 0x80000000, Rb = 0x40000000, Rt = 0x20000000
 * ~~~
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) u>> 1;
 * for RV64: x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long type of value stored in a
 * \param [in]  b    unsigned long type of value stored in b
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_URSUB32(unsigned long a, unsigned long b)
{
    unsigned long result;
    __ASM volatile("ursub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for 4.56. URSUB32 ===== */

#endif /* __RISCV_XLEN == 64 */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default      Nuclei Default SIMD DSP Additional Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    (RV32 & RV64)Nuclei Customized DSP Instructions
 * \details  This is Nuclei customized DSP instructions for both RV32 and RV64
 */

/* ===== Inline Function Start for EXPD80 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD80 (Expand and Copy Byte 0 to 32bit(when rv32) or 64bit(when rv64))
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD80 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * When rv32, Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
 * When rv64, Copy 8-bit data from 64-bit chunks into 8 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[0][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD80(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd80 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD80 ===== */

/* ===== Inline Function Start for EXPD81 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD81 (Expand and Copy Byte 1 to 32bit(rv32) or 64bit(when rv64))
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD81 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[1][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD81(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd81 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD81 ===== */

/* ===== Inline Function Start for EXPD82 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD82 (Expand and Copy Byte 2 to 32bit(rv32) or 64bit(when rv64))
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD82 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[2][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD82(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd82 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD82 ===== */

/* ===== Inline Function Start for EXPD83 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD83 (Expand and Copy Byte 3 to 32bit(rv32) or 64bit(when rv64))
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD83 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[3][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD83(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd83 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD83 ===== */

#if (__RISCV_XLEN == 64)
/* ===== Inline Function Start for EXPD84 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD84 (Expand and Copy Byte 4 to 64bit)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD84 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[4][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[4][7:0], Rs1.B[4][7:0], Rs1.B[4][7:0], Rs1.B[4][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD84(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd84 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD84 ===== */

/* ===== Inline Function Start for EXPD85 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD85 (Expand and Copy Byte 5 to 64bit)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD85 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[5][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[5][7:0], Rs1.B[5][7:0], Rs1.B[5][7:0], Rs1.B[5][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD85(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd85 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD85 ===== */

/* ===== Inline Function Start for EXPD86 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD86 (Expand and Copy Byte 6 to 64bit)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD86 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[6][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[6][7:0], Rs1.B[6][7:0], Rs1.B[6][7:0], Rs1.B[6][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD86(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd86 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD86 ===== */

/* ===== Inline Function Start for EXPD87 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
 * \brief EXPD87 (Expand and Copy Byte 7 to 64bit)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * EXPD87 Rd, Rs1
 * ~~~
 *
 * **Purpose**:\n
 * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
 *
 * **Description**:\n
 * Moves Rs1.B[7][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.B[7][7:0], Rs1.B[7][7:0], Rs1.B[7][7:0], Rs1.B[7][7:0]);
 * for RV32: x=0
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_EXPD87(unsigned long a)
{
    unsigned long result;
    __ASM volatile("expd87 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for EXPD87 ===== */
#endif /* __RISCV_XLEN == 64 */

#if (__RISCV_XLEN == 32) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__)
/* XXXXX Nuclei Extended DSP Instructions for RV32 XXXXX */

/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1      Nuclei N1 SIMD DSP Additional Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    (RV32 only)Nuclei Customized N1 DSP Instructions
 * \details  This is Nuclei customized DSP N1 instructions only for RV32
 */
/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2      Nuclei N2 SIMD DSP Additional Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    (RV32 only)Nuclei Customized N2 DSP Instructions
 * \details  This is Nuclei customized DSP N2 instructions only for RV32
 */
/**
 * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3      Nuclei N3 SIMD DSP Additional Instructions
 * \ingroup  NMSIS_Core_DSP_Intrinsic
 * \brief    (RV32 only)Nuclei Customized N3 DSP Instructions
 * \details  This is Nuclei customized DSP N3 instructions only for RV32
 */

/* ===== Inline Function Start for DKHM8 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKHM8 (64-bit SIMD Signed Saturating Q7 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKHM8 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
 * numbers again.
 *
 * **Description**:\n
 * For the `DKHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
 * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
 * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
 *
 * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
 * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
 * The result will be saturated to 0x7F and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
 * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x80 != aop | 0x80 != bop) {
 *     res = (aop s* bop) >> 7;
 *   } else {
 *     res= 0x7F;
 *     OV = 1;
 *   }
 * }
 * Rd.H[x/2] = concat(rest, resb);
 * for RV32, x=0,2,4,6
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKHM8(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkhm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKHM8 ===== */

/* ===== Inline Function Start for DKHM16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKHM16 (64-bit SIMD Signed Saturating Q15 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKHM16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
 * Q15 numbers again.
 *
 * **Description**:\n
 * For the `DKHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
 * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
 * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
 * Rs2.
 *
 * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
 * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
 * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
 * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x8000 != aop | 0x8000 != bop) {
 *     res = (aop s* bop) >> 15;
 *   } else {
 *     res= 0x7FFF;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x/2] = concat(rest, resb);
 * for RV32: x=0, 2
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKHM16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkhm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKHM16 ===== */

/* ===== Inline Function Start for DKABS8 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKABS8 (64-bit SIMD 8-bit Saturating Absolute)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKABS8 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Get the absolute value of 8-bit signed integer elements simultaneously.
 *
 * **Description**:\n
 * This instruction calculates the absolute value of 8-bit signed integer elements stored
 * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates
 * 0x7f as the output and sets the OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.B[x];
 * if (src == 0x80) {
 *   src = 0x7f;
 *   OV = 1;
 * } else if (src[7] == 1)
 *   src = -src;
 * }
 * Rd.B[x] = src;
 * for RV32: x=7...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKABS8(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dkabs8 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DKABS8 ===== */

/* ===== Inline Function Start for DKABS16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKABS16 (64-bit SIMD 16-bit Saturating Absolute)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKABS16 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Get the absolute value of 16-bit signed integer elements simultaneously.
 *
 * **Description**:\n
 * This instruction calculates the absolute value of 16-bit signed integer elements stored
 * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction
 * generates 0x7fff as the output and sets the OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.H[x];
 * if (src == 0x8000) {
 *   src = 0x7fff;
 *   OV = 1;
 * } else if (src[15] == 1)
 *   src = -src;
 * }
 * Rd.H[x] = src;
 * for RV32: x=3...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKABS16(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dkabs16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DKABS16 ===== */

/* ===== Inline Function Start for DKSLRA8 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKSLRA8 (64-bit SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSLRA8 Rd, Rs1, Rs2
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q7 saturation for the left shift.
 *
 * **Description**:\n
 * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
 * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
 * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
 * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1].
 * If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[3:0] < 0) {
 *   sa = -Rs2[3:0];
 *   sa = (sa == 8)? 7 : sa;
 *   Rd.B[x] = SE8(Rs1.B[x][7:sa]);
 * } else {
 *   sa = Rs2[2:0];
 *   res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
 *   if (res > (2^7)-1) {
 *     res[7:0] = 0x7f; OV = 1;
 *   } else if (res < -2^7) {
 *     res[7:0] = 0x80; OV = 1;
 *   }
 *   Rd.B[x] = res[7:0];
 * }
 * for RV32: x=7...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSLRA8(unsigned long long a, int b)
{
    unsigned long long result;
    __ASM volatile("dkslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSLRA8 ===== */

/* ===== Inline Function Start for DKSLRA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKSLRA16 (64-bit SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSLRA16 Rd, Rs1, Rs2
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
 * Q15 saturation for the left shift.
 *
 * **Description**:\n
 * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
 * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
 * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
 * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
 * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
 * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1].
 * After the shift, saturation, or rounding, the final results are written to
 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
 * this instruction.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[4:0] < 0) {
 *   sa = -Rs2[4:0];
 *   sa = (sa == 16)? 15 : sa;
 *   Rd.H[x] = SE16(Rs1.H[x][15:sa]);
 * } else {
 *   sa = Rs2[3:0];
 *   res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
 *   if (res > (2^15)-1) {
 *     res[15:0] = 0x7fff; OV = 1;
 *   } else if (res < -2^15) {
 *     res[15:0] = 0x8000; OV = 1;
 *   }
 *   d.H[x] = res[15:0];
 * }
 * for RV32: x=3...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSLRA16(unsigned long long a, int b)
{
    unsigned long long result;
    __ASM volatile("dkslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSLRA16 ===== */

/* ===== Inline Function Start for DKADD8 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKADD8 (64-bit SIMD 8-bit Signed Saturating Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKADD8 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
 * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they
 * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.B[x] + Rs2.B[x];
 * if (res[x] > 127) {
 *   res[x] = 127;
 *   OV = 1;
 * } else if (res[x] < -128) {
 *   res[x] = -128;
 *   OV = 1;
 * }
 * Rd.B[x] = res[x];
 * for RV32: x=7...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKADD8(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKADD8 ===== */

/* ===== Inline Function Start for DKADD16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKADD16 (64-bit SIMD 16-bit Signed Saturating Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKADD16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
 * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1),
 * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.H[x] + Rs2.H[x];
 * if (res[x] > 32767) {
 *   res[x] = 32767;
 *   OV = 1;
 * } else if (res[x] < -32768) {
 *   res[x] = -32768;
 *   OV = 1;
 * }
 * Rd.H[x] = res[x];
 * for RV32: x=3...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKADD16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKADD16 ===== */

/* ===== Inline Function Start for DKSUB8 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKSUB8 (64-bit SIMD 8-bit Signed Saturating Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSUB8 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 8-bit signed elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
 * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1),
 * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.B[x] - Rs2.B[x];
 * if (res[x] > (2^7)-1) {
 *   res[x] = (2^7)-1;
 *   OV = 1;
 * } else if (res[x] < -2^7) {
 *   res[x] = -2^7;
 *   OV = 1;
 * }
 * Rd.B[x] = res[x];
 * for RV32: x=7...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSUB8(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSUB8 ===== */

/* ===== Inline Function Start for DKSUB16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
 * \brief DKSUB16 (64-bit SIMD 16-bit Signed Saturating Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSUB16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer elements saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
 * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <=
 * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
 * Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.H[x] - Rs2.H[x];
 * if (res[x] > (2^15)-1) {
 *   res[x] = (2^15)-1;
 *   OV = 1;
 * } else if (res[x] < -2^15) {
 *   res[x] = -2^15;
 *   OV = 1;
 * }
 * Rd.H[x] = res[x];
 * for RV32: x=3...0,
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSUB16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSUB16 ===== */

/* ===== Inline Function Start for DKHMX8 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief DKHMX8 (64-bit SIMD Signed Crossed Saturating Q7 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKHMX8 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do Q7xQ7 element crossed multiplications simultaneously. The Q15 results are then reduced to Q7 numbers again.
 *
 * **Description**:\n
 * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
 * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
 * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
 *
 * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
 * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
 * The result will be saturated to 0x7F and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.B[x+1]; op2t = Rs2.B[x]; // top
 * op1b = Rs1.B[x]; op2b = Rs2.B[x+1]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x80 != aop | 0x80 != bop) {
 *     res = (aop s* bop) >> 7;
 *   } else {
 *     res= 0x7F;
 *     OV = 1;
 *   }
 * }
 * Rd.H[x/2] = concat(rest, resb);
 * for RV32, x=0,2,4,6
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKHMX8(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkhmx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKHMX8 ===== */

/* ===== Inline Function Start for DKHMX16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief DKHMX16 (64-bit SIMD Signed Crossed Saturating Q15 Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKHMX16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do Q15xQ15 element crossed multiplications simultaneously. The Q31 results are then reduced to Q15 numbers again.
 *
 * **Description**:\n
 * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
 * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
 * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
 *
 * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
 * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
 * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // top
 * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   if (0x8000 != aop | 0x8000 != bop) {
 *     res = (aop s* bop) >> 15;
 *   } else {
 *     res= 0x7FFF;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x/2] = concat(rest, resb);
 * for RV32, x=0,2
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKHMX16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkhmx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKHMX16 ===== */

/* ===== Inline Function Start for DSMMUL ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief DSMMUL (64-bit MSW 32x32 Signed Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMMUL Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element signed multiplications simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
 * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
 * elements of Rs1 and Rs2 are treated as signed integers. The .u form of the instruction rounds up
 * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = (aop s* bop)[63:32];
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMMUL(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dsmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMMUL ===== */

/* ===== Inline Function Start for DSMMUL.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief  DSMMUL.u (64-bit MSW 32x32 Unsigned Multiply)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMMUL.u Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element unsigned multiplications simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
 * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
 * elements of Rs1 and Rs2 are treated as unsigned integers. The .u form of the instruction rounds up
 * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = RUND(aop u* bop)[63:32];
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMMUL_U(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dsmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMMUL.u ===== */

/* ===== Inline Function Start for DKWMMUL ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DKWMMUL (64-bit MSW 32x32 Signed Multiply & Double)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKWMMUL Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element signed multiplications simultaneously and double. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
 * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
 * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
 * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The .u
 * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
 * 30 before the shift and saturation operations.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *     res = sat.q31((aop s* bop) << 1)[63:32];
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKWMMUL(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkwmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKWMMUL ===== */

/* ===== Inline Function Start for DKWMMUL.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DKWMMUL.u (64-bit MSW 32x32 Unsigned Multiply & Double)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKWMMUL.u Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element unsigned multiplications simultaneously and double. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
 * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
 * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
 * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The .u
 * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
 * 30 before the shift and saturation operations.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
 *   res = sat.q31(RUND(aop u* bop) << 1)[63:32];
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKWMMUL_U(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkwmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKWMMUL.u ===== */

/* ===== Inline Function Start for DKABS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DKABS32 (64-bit SIMD 32-bit Saturating Absolute)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKABS32 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Get the absolute value of 32-bit signed integer elements simultaneously.
 *
 * **Description**:\n
 * This instruction calculates the absolute value of 32-bit signed integer elements stored in Rs1 and writes the element
 * results to Rd. If the input number is 0x8000_0000, this instruction generates 0x7fff_ffff as the output and sets the OV
 * bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.W[x];
 * if (src == 0x8000_0000) {
 *   src = 0x7fff_ffff;
 *   OV = 1;
 * } else if (src[31] == 1)
 *   src = -src;
 * }
 * Rd.W[x] = src;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKABS32(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dkabs32 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DKABS32 ===== */

/* ===== Inline Function Start for DKSLRA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DKSLRA32 (64-bit SIMD 32-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSLRA32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 31-bit elements logical left (positive) or arithmetic right (negative) shift operation with Q31 saturation for the left shift.
 *
 * **Description**:\n
 * The 31-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically based on the value of Rs2[5:0].
 * Rs2[5:0] is in the signed range of [-2^5, 2^5-1]. A positive Rs2[5:0] means logical left shift and a negative Rs2[4:0]
 * means arithmetic right shift. The shift amount is the absolute value of Rs2[5:0]. However, the behavior of Rs2[5:0]==-
 * 2^5 (0x20) is defined to be equivalent to the behavior of Rs2[5:0]==-(2^5-1) (0x21).
 *
 * **Operations**:\n
 * ~~~
 * if (Rs2[5:0] < 0) {
 *   sa = -Rs2[5:0];
 *   sa = (sa == 32)? 31 : sa;
 *   Rd.W[x] = SE32(Rs1.W[x][31:sa]);
 * } else {
 *   sa = Rs2[4:0];
 *   res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
 *   if (res > (2^31)-1) {
 *   res[31:0] = 0x7fff_ffff; OV = 1;
 * } else if (res < -2^31) {
 *   res[31:0] = 0x8000_0000; OV = 1;
 * }
 *   Rd.W[x] = res[31:0];
 * }
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b int type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSLRA32(unsigned long long a, int b)
{
    unsigned long long result;
    __ASM volatile("dkslra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSLRA32 ===== */

/* ===== Inline Function Start for DKADD32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DKADD32(64-bit SIMD 32-bit Signed Saturating Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKADD32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed integer elements in Rs2. If any
 * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV
 * bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.W[x] + Rs2.W[x];
 * if (res[x] > 0x7fff_ffff) {
 *   res[x] = 0x7fff_ffff;
 *   OV = 1;
 * } else if (res[x] < 0x8000_0000) {
 *   res[x] = 0x8000_0000;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKADD32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKADD32 ===== */

/* ===== Inline Function Start for DKSUB32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DKSUB32 (64-bit SIMD 32-bit Signed Saturating Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSUB32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1. If
 * any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the
 * OV bit is set to 1. The saturated results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[x] = Rs1.W[x] - Rs2.W[x];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res[x] < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[x] = res[x];
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSUB32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSUB32 ===== */

/* ===== Inline Function Start for DRADD16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DRADD16 (64-bit SIMD 16-bit Halving Signed Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRADD16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element additions simultaneously. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed integer elements in Rs2. The results
 * are first arithmetically right-shifted by 1 bit and then written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = [(Rs1.H[x]) + (Rs2.H[x])] s>> 1;
 * x=3...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRADD16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dradd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRADD16 ===== */

/* ===== Inline Function Start for DSUB16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DSUB16 (64-bit SIMD 16-bit Halving Signed Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSUB16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed integer elements in Rs2. The results
 * are first arithmetically right-shifted by 1 bit and then written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = [(Rs1.H[x]) - (Rs2.H[x])] ;
 * x=3...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSUB16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSUB16 ===== */

/* ===== Inline Function Start for DRADD32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DRADD32 (64-bit SIMD 32-bit Halving Signed Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRADD32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element additions simultaneously. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed integer elements in Rs2. The results
 * are first arithmetically right-shifted by 1 bit and then written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = [(Rs1.W[x]) + (Rs2.W[x])] s>> 1;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRADD32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dradd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRADD32 ===== */

/* ===== Inline Function Start for DSUB32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSUB32 (64-bit SIMD 32-bit Halving Signed Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSUB32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element subtractions simultaneously.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1 . The
 * results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = [(Rs1.E[x]) - (Rs2.E[x])] ;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSUB32(unsigned long long a, unsigned long long b)
{
     unsigned long long result;
    __ASM volatile("dsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSUB32 ===== */

/* ===== Inline Function Start for DMSR16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DMSR16 (Signed Multiply Halfs with Right Shift 16-bit and Cross Multiply Halfs with Right Shift 16-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DMSR16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications and cross multiplications from the 16-bit elements of two registers; and each multiplications performs a right shift operation.
 *
 * **Description**:\n
 * For the `DMSR16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content
 * of 32-bit chunks in Rs2, multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content
 * of 32-bit chunks in Rs2.
 * At the same time, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom16-bit Q15 content of 32-bit
 * chunks in Rs2 and multiply the bottom16-bit Q15 content of 32-bit chunks in Rs1 with the top16-bit Q15 content of 32-bit
 * chunks in Rs2. The Q31 results are then right-shifted 16-bits and clipped to Q15 values. The Q15 results are then written
 * into Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[0] = (Rs1.H[0] s* Rs2.H[0]) s>> 16
 * Rd.H[1] = (Rs1.H[1] s* Rs2.H[1]) s>> 16
 * Rd.H[2] = (Rs1.H[1] s* Rs2.H[0]) s>> 16
 * Rd.H[3] = (Rs1.H[0] s* Rs2.H[1]) s>> 16
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \param [in]  b unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DMSR16(unsigned long a, unsigned long b)
{
    unsigned long long result;
    __ASM volatile("dmsr16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DMSR16 ===== */

/* ===== Inline Function Start for DMSR17 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DMSR17 (Signed Multiply Halfs with Right Shift 17-bit and Cross Multiply Halfs with Right Shift 17-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DMSR17 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications and cross multiplications from the 16-bit elements of two registers;
 * and each multiplications performs a right shift operation.
 *
 * **Description**:\n
 * For the `DMSR17` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content
 * of 32-bit chunks in Rs2, multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content
 * of 32-bit chunks in Rs2.
 * At the same time, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit
 * chunks in Rs2 and multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit
 * chunks in Rs2. The Q31 results are then right-shifted 17-bits and clipped to Q15 values. The Q15 results are then written
 * into Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[0] = (Rs1.H[0] s* Rs2.H[0]) s>> 17
 * Rd.H[1] = (Rs1.H[1] s* Rs2.H[1]) s>> 17
 * Rd.H[2] = (Rs1.H[1] s* Rs2.H[0]) s>> 17
 * Rd.H[3] = (Rs1.H[0] s* Rs2.H[1]) s>> 17
 * ~~~
 *
 * \param [in]  a unsigned long type of value stored in a
 * \param [in]  b unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DMSR17(unsigned long a, unsigned long b)
{
    unsigned long long result;
    __ASM volatile("dmsr17 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DMSR17 ===== */

/* ===== Inline Function Start for DMSR33 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DMSR33 (Signed Multiply with Right Shift 33-bit and Cross Multiply with Right Shift 33-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DMSR33 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit multiplications from the 32-bit elements of two registers, and each multiplications performs a right
 * shift operation.
 *
 * **Description**:\n
 * For the `DMSR33` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the top 32-bit Q31 content
 * of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64bit chunks in Rs1 with the bottom 
 * 32-bit Q31 content of 64-bit. 
 * The Q64 results are then right-shifted 33-bits and clipped to Q31 values. The Q31 results are then written into Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[0] = (Rs1.W[0] s* Rs2.W[0]) s>> 33
 * Rd.W[1] = (Rs1.W[1] s* Rs2.W[1]) s>> 33
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DMSR33(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dmsr33 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DMSR33 ===== */

/* ===== Inline Function Start for DMXSR33 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DMXSR33 (Signed Multiply with Right Shift 33-bit and Cross Multiply with Right Shift 33-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DMXSR33 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32-bit cross multiplications from the 32-bit elements of two registers, and each multiplications performs a
 * right shift operation.
 *
 * **Description**:\n
 * For the `DMXSR33` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit Q31
 * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with
 * the top 32-bit Q31 content of 64-bit chunks in Rs2.
 * The Q63 results are then right-shifted 33-bits and clipped to Q31 values. The Q31 results are then written into Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[0] = (Rs1.W[0] s* Rs2.W[1]) s>> 33
 * Rd.W[1] = (Rs1.W[1] s* Rs2.W[0]) s>> 33
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DMXSR33(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dmxsr33 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DMXSR33 ===== */

/* ===== Inline Function Start for DREDAS16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DREDAS16 (Reduced Addition and Reduced Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DREDAS16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do halfs reduced subtraction and halfs reduced addition from a register. The result is written to Rd.
 *
 * **Description**:\n
 * For the `DREDAS16` instruction, subtract the top 16-bit Q15 element from the bottom 16-bit Q15 element of the bottom
 * 32-bit Q31 content of 64-bit chunks in Rs1. At the same time, add the the top16-bit Q15 element with the bottom16-bit
 * Q15 element of the top 32-bit Q31 content of 64-bit chunks in Rs1. The two Q15 results are then written into Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[0] = Rs1.H[0] - Rs1.H[1]
 * Rd.H[1] = Rs1.H[2] + Rs1.H[3]
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_DREDAS16(unsigned long long a)
{
    unsigned long result;
    __ASM volatile("dredas16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DREDAS16 ===== */

/* ===== Inline Function Start for DREDSA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DREDSA16 (Reduced Subtraction and Reduced Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DREDSA16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do halfs reduced subtraction and halfs reduced addition from a register. The result is written to Rd.
 *
 * **Description**:\n
 * For the `DREDSA16` instruction, add the top 16-bit Q15 element from the bottom 16-bit Q15  element of the bottom 32-bit Q31 content of 64-bit chunks in Rs1. At the same time,  subtract the the top16-bit Q15 element with the bottom16-bit Q15 element of the top 32-bit Q31 content of 64-bit chunks in Rs1. The two Q15 results are then written into Rd. 
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[0] = Rs1.H[0] + Rs1.H[1]
 * Rd.H[1] = Rs1.H[2] - Rs1.H[3]
 * ~~~
 *
 * \param [in]  a unsigned long longtype of value stored in a
 * \return value stored in unsigned long type
 */
__STATIC_FORCEINLINE unsigned long __RV_DREDSA16(unsigned long long a)
{
    unsigned long result;
    __ASM volatile("dredsa16 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DREDSA16 ===== */

/* ===== Inline Function Start for DKCLIP64 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKCLIP64 (64-bit Clipped to 16-bit Saturation Value)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKCLIP64 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 15-bit element arithmetic right shift operations and limit result into 32-bit int,then do saturate operation to 16-bit and
 * clip result to 16-bit Q15.
 *
 * **Description**:\n
 * For the `DKCLIP64` instruction, shift the input 15 bits to the right and data convert the result to 32-bit int type, after
 * which the input is saturated to limit the data to between 2^15-1 and -2^15. the result is converted to 16-bits q15 type. The
 * final results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * const int32_t max = (int32_t)((1U << 15U) - 1U);
 * const int32_t min = -1 - max ;
 * int32_t val = (int32_t)(Rs s>> 15);
 * if (val > max) {
 *   Rd = max;
 * } else if (val < min) {
 *   Rd = min;
 * } else {
 *   Rd = (int16_t)val;
 * }
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in int16_t type
 */
__STATIC_FORCEINLINE int16_t __RV_DKCLIP64(unsigned long long a)
{
    int16_t result;
    __ASM volatile("dkclip64 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DKCLIP64 ===== */

/* ===== Inline Function Start for DKMDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKMDA (Signed Multiply Two Halfs and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMDA Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then adds the two 32-bit results together.
 * The addition result may be saturated.
 *
 * **Description**:\n
 * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
 * 32-bit elements of Rs2 and then adds the result to the result of multiplying the top 16-bit content of the 32-bit elements of
 * Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1 The final results are
 * written to Rd. The 16-bit contents are treated as signed integers
 *
 * **Operations**:\n
 * ~~~
 * if (Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000){
 *   Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
 * } else {
 *   Rd.W[x] = 0x7fffffff;
 *   OV = 1;
 * }
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMDA(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkmda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKMDA ===== */

/* ===== Inline Function Start for DKMXDA ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DKMXDA (Signed Crossed Multiply Two Halfs and Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then adds the two 32-bit results together.
 * The addition result may be saturated.
 * * DKMXDA: top*bottom + top*bottom (per 32-bit element)
 *
 * **Description**:\n
 * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit
 * elements of Rs2 and then adds the result to the result of multiplying the top 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2.
 * The addition result is checked for saturation.If saturation happens, the result is saturated to 2^31-1 The final results are
 * written to Rd. The 16-bit contents are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * if (Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000){
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
 * } else {
 * Rd.W[x] = 0x7fffffff;
 * OV = 1;
 * }
 * x=1...0
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMXDA(unsigned long long a, unsigned long long b)
{
   unsigned long long result;
    __ASM volatile("dkmxda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKMXDA ===== */

/* ===== Inline Function Start for DSMDRS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DSMDRS (Signed Multiply Two Halfs and Reverse Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMDRS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then perform a subtraction operation
 * between the two 32-bit results.
 * * DSMDRS: bottom*bottom - top*top (per 32-bit element)
 *
 * **Description**:\n
 * This instruction multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit
 * elements of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of the 32-bit elements
 * of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
 * The subtraction result is written to the corresponding 32-bit element of Rd (The 16-bit contents of multiplication are
 * treated as signed integers).
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); x = 1...0
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMDRS(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dsmdrs %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMDRS ===== */

/* ===== Inline Function Start for DSMXDS ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DSMXDS (Signed Crossed Multiply Two Halfs and Subtract)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then perform a subtraction operation
 * between the two 32-bit results.
 * * DSMXDS: top*bottom - bottom*top (per 32-bit element)
 *
 * **Description**:\n
 * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit
 * elements of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of the 32-bit elements
 * of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
 * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of multiplication are
 * treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); x = 1...0
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMXDS(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dsmxds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMXDS ===== */

/* ===== Inline Function Start for DSMBB32 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DSMBB32 (Signed Multiply Bottom Word & Bottom Word)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit result to a third register.
 * * DSMBB32: bottom*bottom
 *
 * **Description**:\n
 * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2. The 64-bit multiplication result is written to Rd.
 * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = (Rs1.W[0] * Rs2.W[0]);
 * Rd = res;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMBB32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBB32 ===== */

/* ===== Inline Function Start for DSMBB32.sra14 ===== */
/**
 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DSMBB32.sra14 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 14)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBB32.sra14 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 14-
 * bit,finally write the 64-bit result to a third register.
 * * DSMBB32.sra14: bottom*bottom s>> 14
 *
 * **Description**:\n
 * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2. The 64-bit multiplication result is written to Rd after right shift 14-bit.
 * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
 * Rd = res;
 * ~~~
 *
 * \param [in]  a    unsigned long long type of value stored in a
 * \param [in]  b    unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMBB32_SRA14(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmbb32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBB32.sra14 ===== */

/* ===== Inline Function Start for DSMBB32.sra32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief   DSMBB32.sra32 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 32)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBB32.sra32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 32-
 * bit,finally write the 64-bit result to a third register.
 * * DSMBB32.sra32: bottom*bottom s >> 32
 *
 * **Description**:\n
 * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
 * The 64-bit multiplication result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = (Rs1.W[0] * Rs2.W[0]) s>> 32;
 * Rd = res;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMBB32_SRA32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmbb32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBB32.sra32 ===== */

/* ===== Inline Function Start for DSMBT32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    SMBT32 (Signed Multiply Bottom Word & Top Word)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBT32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit
 * result to a third register.
 * * DSMBT32: bottom*top
 *
 * **Description**:\n
 * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
 * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = (Rs1.W[0] * Rs2.W[0]);
 * Rd = res;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMBT32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBT32 ===== */

/* ===== Inline Function Start for DSMBT32.sra14 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMBT32.sra14 (Signed Multiply Bottom Word & Top Word with Right Shift 14)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBT32.sra14 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 14-
 * bit,finally write the 64-bit result to a third register.
 * * DSMBT32.sra14: bottom*bottom s>> 14
 *
 * **Description**:\n
 * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
 * result is written to Rd after right shift 14-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
 * Rd = res;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMBT32_SRA14(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmbt32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBT32.sra14 ===== */

/* ===== Inline Function Start for DSMBT32.sra32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMBT32.sra32 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 32)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBT32.sra32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 32-
 * bit,finally write the 64-bit result to a third register.
 * * DSMBT32.sra32: bottom*bottom s>> 32
 *
 * **Description**:\n
 * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
 * result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
 * Rd = res;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMBT32_SRA32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmbt32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBT32.sra32 ===== */

/* ===== Inline Function Start for DSMTT32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMTT32 (Signed Multiply Top Word & Top Word)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMTT32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit
 * result to a third register.
 * * DSMTT32: top*top
 *
 * **Description**:\n
 * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
 * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rs1.W[1] * Rs2.W[1];
 * Rd = res;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMTT32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmtt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMTT32 ===== */

/* ===== Inline Function Start for DSMTT32.sra14 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMTT32.sra14 (Signed Multiply Top Word & Top Word with Right Shift 14-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMTT32.sra14 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register,then right shift 14-bit,
 * finally write the 64-bit result to a third register.
 * * DSMTT32.sra14: top*top s>> 14
 *
 * **Description**:\n
 * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
 * result is written to Rd after right shift 14-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rs1.W[1] * Rs2.W[1] >> 14;
 * Rd = res;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMTT32_SRA14(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmtt32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMTT32.sra14 ===== */

/* ===== Inline Function Start for DSMTT32.sra32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMTT32.sra32 (Signed Multiply Top Word & Top Word with Right Shift 32-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMTT32.sra32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register,then right shift 32-bit,
 * finally write the 64-bit result to a third register.
 * * DSMTT32.sra32: top*top s>> 32
 *
 * **Description**:\n
 * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
 * result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rs1.W[1] * Rs2.W[1] >> 32;
 * Rd = res;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMTT32_SRA32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmtt32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMTT32.sra32 ===== */

/* ===== Inline Function Start for DPKBB32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKBB32 (Pack Two 32-bit Data from Both Bottom Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKBB32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * DPKBB32: bottom.bottom
 *
 * **Description**:\n
 * This instruction moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[0], Rs2.W[0]);
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKBB32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpkbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKBB32 ===== */

/* ===== Inline Function Start for DPKBT32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKBT32 (Pack Two 32-bit Data from Bottom and Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKBT32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * DPKBT32: bottom.top
 *
 * **Description**:\n
 * This instruction moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[0], Rs2.W[1]);
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKBT32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpkbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKBT32 ===== */

/* ===== Inline Function Start for DPKTT32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKTT32 (Pack Two 32-bit Data from Both Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKTT32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * DPKTT32: top.top
 *
 * **Description**:\n
 * This instruction moves Rs1.W[1] to Rd.W[0] and moves Rs2.W[1] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[1], Rs2.W[1]);
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKTT32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpktt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKTT32 ===== */

/* ===== Inline Function Start for DPKTB32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKTB32 (Pack Two 32-bit Data from Top and Bottom Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKTB32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 32-bit data from 64-bit chunks in two registers.
 * * DPKTB32: top.bottom
 *
 * **Description**:\n
 * This instruction moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W[1], Rs2.W[0]);
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKTB32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpktb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKTB32 ===== */

/* ===== Inline Function Start for DPKTB16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKTB16 (Pack Two 32-bit Data from Top and Bottom Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKTB16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * DPKTB16: top.bottom
 *
 * **Description**:\n
 * This instruction moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]);
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKTB16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpktb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKTB16 ===== */

/* ===== Inline Function Start for DPKBB16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKBB16 (Pack Two 16-bit Data from Both Bottom Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKBB16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * PKBB16: bottom.bottom
 *
 * **Description**:\n
 * This instruction moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]);
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKBB16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpkbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKBB16 ===== */

/* ===== Inline Function Start for DPKBT16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKBT16 (Pack Two 16-bit Data from Bottom and Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKBT16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * PKBT16: bottom.top
 *
 * **Description**:\n
 * This instruction moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]);
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKBT16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpkbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKBT16 ===== */

/* ===== Inline Function Start for DPKTT16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPKTT16 (Pack Two 16-bit Data from Both Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPKTT16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Pack 16-bit data from 32-bit chunks in two registers.
 * * PKTT16 top.top 
 *
 * **Description**:\n
 * This instruction moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]);
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPKTT16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dpktt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPKTT16 ===== */

/* ===== Inline Function Start for DSRA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSRA16 (SIMD 16-bit Shift Right Arithmetic)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSRA16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a variable from a GPR.
 *
 * **Description**:\n
 * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out bits are filled with the sign-bit of
 * the data elements. The shift amount is specified by the low-order 4-bits of the value in the Rs2 register. And the results
 * are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * sa = Rs2[3:0];
 * if (sa != 0)
 * {
 * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
 * } else {
 * Rd = Rs1;
 * }
 * x=3...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSRA16(unsigned long long a, unsigned long b)
{
    unsigned long long result;
    __ASM volatile("dsra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSRA16 ===== */

/* ===== Inline Function Start for DADD16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DADD16 (16-bit Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DADD16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit integer element additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit unsigned integer elements in Rs2. And
 * the results are written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = Rs1.H[x] + Rs2.H[x];
 * x=3...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DADD16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DADD16 ===== */

/* ===== Inline Function Start for DADD32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DADD32 (32-bit Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DADD32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element additions simultaneously.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer elements in Rs1 with the 32-bit integer elements in Rs2, and then writes the 32-bit
 * element results to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x] + Rs2.W[x];
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DADD32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DADD32 ===== */

/* ===== Inline Function Start for DSMBB16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMBB16 (Signed Multiply Bottom Half & Bottom Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBB16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit elements
 * of another register and write the result to a third register.
 * * DSMBB16: W[x].bottom*W[x].bottom
 *
 * **Description**:\n
 * For the `DSMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom
 * 16-bit content of the 32-bit elements of Rs2.
 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0];
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMBB16(unsigned long long a, unsigned long long b) /* pass */
{
    unsigned long long result;
    __ASM volatile("dsmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBB16 ===== */

/* ===== Inline Function Start for DSMBT16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMBT16 (Signed Multiply Bottom Half & Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMBT16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit
 * elements of another register and write the result to a third register.
 * * DSMBT16: W[x].bottom *W[x].top
 *
 * **Description**:\n
 * For the `DSMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
 * content of the 32-bit elements of Rs2.
 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1];
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMBT16(unsigned long long a, unsigned long long b) /* pass */
{
    unsigned long long result;
    __ASM volatile("dsmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMBT16 ===== */

/* ===== Inline Function Start for DSMTT16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSMTT16 (Signed Multiply Top Half & Top Half)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMTT16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit
 * elements of another register and write the result to a third register.
 * * DSMTT16: W[x].top * W[x].top
 *
 * **Description**:\n
 * For the `DSMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
 * content of the 32-bit elements of Rs2.
 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1];
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMTT16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dsmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMTT16 ===== */

/* ===== Inline Function Start for DRCRSA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DRCRSA16 (16-bit Signed Halving Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRCRSA16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in a 32-bit chunk simultaneously.
 * Operands are from crossed positions in 32-bit chunks. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer in
 * [15:0] of 32-bit chunks in Rs2, and adds the 16-bit signed integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed
 * integer in [15:0] of 32-bit chunks in Rs1. The element results are first logically right-shifted by 1 bit and then written to
 * [31:16] of 32- bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) s>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) s>> 1;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRCRSA16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("drcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRCRSA16 ===== */

/* ===== Inline Function Start for DRCRSA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DRCRSA32 (32-bit Signed Halving Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRCRSA32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in a 64-bit chunk simultaneously.
 * Operands are from crossed 32-bit elements. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit signed integer element in
 * [31:0] of Rs2, and adds the 32-bit signed integer element in [63:32] of Rs2 from the 32-bit signed integer element in [31:0]
 * of Rs1. The element results are first arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and
 * [31:0] of Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) s>> 1;
 * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) s>> 1;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRCRSA32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("drcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRCRSA32 ===== */

/* ===== Inline Function Start for DRCRAS16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DRCRAS16 (16-bit Signed Halving Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRCRAS16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in a 32-bit chunk simultaneously.
 * Operands are from crossed positions in 32-bit chunks. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in
 * [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit
 * unsigned integer in [15:0] of 32-bit chunks in Rs1. The element results are first logically right-shifted by 1 bit and then
 * written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) s>> 1;
 * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) s>> 1;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRCRAS16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("drcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRCRAS16 ===== */

/* ===== Inline Function Start for DRCRAS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DRCRAS32 (32-bit Signed Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRCRAS32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in a 64-bit chunk simultaneously.
 * Operands are from crossed 32-bit elements. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit signed integer element in [31:0]
 * of Rs2, and subtracts the 32-bit signed integer element in [63:32] of Rs2 from the 32-bit signed integer element in [31:0]
 * of Rs1. The element results are first arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition
 * and [31:0] of Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) s>> 1;
 * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) s>> 1;
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRCRAS32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("DRCRAS32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRCRAS32 ===== */

/* ===== Inline Function Start for DKCRAS16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKCRAS16 (16-bit Signed Saturating Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKCRAS16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating addition and 16-bit signed integer element saturating subtraction in a 32-bit
 * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer
 * element in [15:0] of 32-bit chunks in Rs2; at the same time, it subtracts the 16-bit signed integer element in [31:16] of
 * 32-bit chunks in Rs2 from the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
 * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
 * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks
 * in Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
 * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKCRAS16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKCRAS16 ===== */

/* ===== Inline Function Start for DKCRSA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKCRSA16 (16-bit Signed Saturating Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKCRSA16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element saturating addition in a 32-bit
 * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer
 * element in [31:16] of 32-bit chunks in Rs1; at the same time, it adds the 16-bit signed integer element in [31:16] of 32-bit
 * chunks in Rs2 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
 * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
 * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks
 * in Rd for subtraction.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
 * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKCRSA16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKCRSA16 ===== */

/* ===== Inline Function Start for DRSUB16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DRSUB16 (16-bit Signed Halving Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRSUB16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element subtractions simultaneously. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit signed integer elements in Rs1. The
 * results are first arithmetically right-shifted by 1 bit and then written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) s>> 1;
 * x=3...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRSUB16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("drsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRSUB16 ===== */

/* ===== Inline Function Start for DSTSA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSTSA32 (32-bit Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSTSA32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit chunk simultaneously. Operands are
 * from corresponding 32-bit elements.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [63:32] of Rs1,
 * and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit
 * integer element in [31:0] of Rs2, and writes the result to [31:0] of Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = Rs1.W[1] - Rs2.W[1];
 * Rd.W[0] = Rs1.W[0] + Rs2.W[0];
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSTSA32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSTSA32 ===== */

/* ===== Inline Function Start for DSTAS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSTAS32 (SIMD 32-bit Straight Addition & Subtractionn)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSTAS32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit chunk simultaneously. Operands are
 * from corresponding 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [63:32] of Rs2,
 * and writes the result to [63:32] of Rd; at the same time, it subtracts the 32-bit integer element in [31:0] of Rs2
 * from the 32-bit integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[1] = Rs1.W[1] + Rs2.W[1];
 * Rd.W[0] = Rs1.W[0] - Rs2.W[0];
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSTAS32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("DSTAS32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSTAS32 ===== */

/* ===== Inline Function Start for DKCRSA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKCRSA32 (32-bit Signed Saturating Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKCRSA32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element saturating addition in a 64-bit
 * chunk simultaneously. Operands are from crossed 32-bit elements.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [63:32] of Rs1; at
 * the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2. If any
 * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is
 * set to 1. The saturated results are written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] - Rs2.W[0];
 * res[0] = Rs1.W[0] + Rs2.W[1];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[1] = res[1];
 * Rd.W[0] = res[0];
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKCRSA32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKCRSA32 ===== */

/* ===== Inline Function Start for DKCRAS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKCRAS32 (32-bit Signed Saturating Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKCRAS32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element saturating addition in a 64-bit
 * chunk simultaneously. Operands are from crossed 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [63:32] of Rs1; at the
 * same time, it subtracts the 32-bit integer element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2. If any
 * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is
 * set to 1. The saturated results are written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] + Rs2.W[0];
 * res[0] = Rs1.W[0] - Rs2.W[1];
 * if (res[x] > (2^31)-1) {
 *   res[x] = (2^31)-1;
 *   OV = 1;
 * } else if (res < -2^31) {
 *   res[x] = -2^31;
 *   OV = 1;
 * }
 * Rd.W[1] = res[1];
 * Rd.W[0] = res[0];
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKCRAS32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKCRAS32 ===== */

/* ===== Inline Function Start for DCRSA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DCRSA32 (32-bit Cross Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DCRSA32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit chunk simultaneously. Operands are
 * from crossed 32-bit elements.
 *
 * **Description**:\n
 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and
 * writes the result to [63:32] of Rd; at the same time, it subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit
 * integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] - Rs2.W[0];
 * res[0] = Rs1.W[0] + Rs2.W[1];
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DCRSA32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DCRSA32 ===== */

/* ===== Inline Function Start for DCRAS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DCRAS32 (32-bit Cross Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DCRAS32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit chunk simultaneously. Operands are
 * from crossed 32-bit elements.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and
 * writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer element in [63:32] of Rs2 from the 32-bit
 * integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.
 *
 * **Operations**:\n
 * ~~~
 * res[1] = Rs1.W[1] - Rs2.W[0];
 * res[0] = Rs1.W[0] + Rs2.W[1];
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DCRAS32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DCRAS32 ===== */

/* ===== Inline Function Start for DKSTSA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKSTSA16 (16-bit Signed Saturating Straight Subtraction & Addition)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSTSA16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element saturating addition in a 32-bit
 * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer
 * element in [31:16] of 32-bit chunks in Rs1; at the same time, it adds the 16-bit signed integer element in [15:0] of 32-bit
 * chunks in Rs2 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
 * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
 * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks
 * in Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
 * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSTSA16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSTSA16 ===== */

/* ===== Inline Function Start for DKSTAS16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DKSTAS16 (16-bit Signed Saturating Straight Addition & Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSTAS16 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 16-bit signed integer element saturating addition and 16-bit signed integer element saturating subtraction in a 32-bit
 * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
 *
 * **Description**:\n
 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer
 * element in [31:16] of 32-bit chunks in Rs2; at the same time, it subtracts the 16-bit signed integer element in [15:0] of
 * 32-bit chunks in Rs2 from the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
 * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
 * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks
 * in Rd for addition.
 *
 * **Operations**:\n
 * ~~~
 * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
 * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
 * for (res in [res1, res2]) {
 *   if (res > (2^15)-1) {
 *     res = (2^15)-1;
 *     OV = 1;
 *   } else if (res < -2^15) {
 *     res = -2^15;
 *     OV = 1;
 *   }
 * }
 * Rd.W[x][31:16] = res1;
 * Rd.W[x][15:0] = res2;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSTAS16(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("dkstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKSTAS16 ===== */

/* ===== Inline Function Start for DSCLIP8 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSCLIP8 (8-bit Signed Saturation and Clip)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSCLIP8 Rd, Rs1, imm3u[2:0]
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 8-bit signed integer elements of a register into a signed range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 8-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm3u and
 * 2^imm3u-1, and writes the limited results to Rd. For example, if imm3u is 3, the 8-bit input values should be saturated
 * between 7 and -8. If saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.B[x];
 * if (src > (2^imm3u)-1) {
 *   src = (2^imm3u)-1;
 *   OV = 1;
 * } else if (src < -2^imm3u) {
 *   src = -2^imm3u;
 *   OV = 1;
 * }
 * Rd.B[x] = src
 * x=7...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
#define __RV_DSCLIP8(a, b)    \
    ({    \
        unsigned long long result;    \
        unsigned long long __a = (unsigned long long)(a);    \
        __ASM volatile("dsclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for DSCLIP8 ===== */

/* ===== Inline Function Start for DSCLIP16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSCLIP16 (16-bit Signed Saturation and Clip)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSCLIP16 Rd, Rs1, imm4u[3:0]
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 16-bit signed integer elements of a register into a signed range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 16-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm4u and
 * 2^imm4u-1, and writes the limited results to Rd. For example, if imm4u is 3, the 32-bit input values should be saturated
 * between 7 and -8. If saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.H[x];
 * if (src > (2^imm4u)-1) {
 *   src = (2^imm4u)-1;
 *   OV = 1;
 * } else if (src < -2^imm4u) {
 *   src = -2^imm4u;
 *   OV = 1;
 * }
 * Rd.H[x] = src
 * x=3...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
#define __RV_DSCLIP16(a, b)    \
    ({    \
        unsigned long long result;    \
        unsigned long long __a = (unsigned long long)(a);    \
        __ASM volatile("dsclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for DSCLIP16 ===== */

/* ===== Inline Function Start for DSCLIP32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSCLIP32 (32-bit Signed Saturation and Clip)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSCLIP32 Rd, Rs1, imm5u[4:0]
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Limit the 32-bit signed integer elements of a register into a signed range simultaneously.
 *
 * **Description**:\n
 * This instruction limits the 32-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm5u and
 * 2^imm5u-1, and writes the limited results to Rd. For example, if imm5u is 3, the 32-bit input values should be saturated
 * between 7 and -8. If saturation is performed, set OV bit to 1.
 *
 * **Operations**:\n
 * ~~~
 * src = Rs1.W[x];
 * if (src > (2^imm5u)-1) {
 *   src = (2^imm5u)-1;
 *   OV = 1;
 * } else if (src < -2^imm5u) {
 *   src = -2^imm5u;
 *   OV = 1;
 * }
 * Rd.W[x] = src
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
#define __RV_DSCLIP32(a, b)    \
    ({    \
        unsigned long long result;    \
        unsigned long long __a = (unsigned long long)(a);    \
        __ASM volatile("dsclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
        result;    \
    })
/* ===== Inline Function End for DSCLIP32 ===== */

/* ===== Inline Function Start for DRSUB32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DRSUB32 (32-bit Signed Halving Subtraction)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DRSUB32 Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do 32-bit signed integer element subtractions simultaneously. The results are halved to avoid overflow or saturation.
 *
 * **Description**:\n
 * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1. The
 * results are first arithmetically right-shifted by 1 bit and then written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) s>> 1;
 * x=1...0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DRSUB32(unsigned long long a, unsigned long long b)
{
    unsigned long long result;
    __ASM volatile("drsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DRSUB32 ===== */

/* ===== Inline Function Start for DPACK32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DPACK32 (SIMD Pack Two 32-bit Data To 64-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DPACK32 Rd, Rs1, Rs2
 * # Rd is even/odd pair of register
 * ~~~
 *
 * **Purpose**:\n
 * Pack two 32-bit datas which from two registers into a 64-bit data.
 *
 * **Description**:\n
 * This instruction moves 32-bit Rs1 to Rd.W[1] and moves 32-bit Rs2 to Rd.W[0].
 *
 * **Operations**:\n
 * ~~~
 * Rd = CONCAT(Rs1.W , Rs2.W);
 * ~~~
 *
 * \param [in]  a signed long type of value stored in a
 * \param [in]  b signed long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DPACK32(signed long a, signed long b)
{
    unsigned long long result;
    __ASM volatile("dpack32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DPACK32 ===== */

/* ===== Inline Function Start for DSUNPKD810 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSUNPKD810 (Signed Unpacking Bytes 1 & 0)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSUNPKD810 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 1 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DSUNPKD810` instruction, it unpacks byte 1 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[1])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD810(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dsunpkd810 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DSUNPKD810 ===== */

/* ===== Inline Function Start for DSUNPKD820 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSUNPKD820 (Signed Unpacking Bytes 2 & 0)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSUNPKD820 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 2 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DSUNPKD820` instruction, it unpacks byte 2 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[2])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD820(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dsunpkd820 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DSUNPKD820 ===== */

/* ===== Inline Function Start for DSUNPKD830 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSUNPKD830 (Signed Unpacking Bytes 3 & 0)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSUNPKD830 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 3 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DSUNPKD830` instruction, it unpacks byte 3 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD830(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dsunpkd830 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DSUNPKD830 ===== */

/* ===== Inline Function Start for DSUNPKD831 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSUNPKD831 (Signed Unpacking Bytes 3 & 1)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSUNPKD831 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 3 and byte 1 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DSUNPKD831` instruction, it unpacks byte 3 and byte 1 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[1])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD831(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dsunpkd831 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DSUNPKD831 ===== */

/* ===== Inline Function Start for DSUNPKD832 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DSUNPKD832 (Signed Unpacking Bytes 3 & 2)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSUNPKD832 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 3 and byte 2 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DSUNPKD832` instruction, it unpacks byte 3 and byte 2 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[2])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD832(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dsunpkd832 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DSUNPKD832 ===== */

/* ===== Inline Function Start for DZUNPKD810 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DZUNPKD810 (UnSigned Unpacking Bytes 1 & 0)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DZUNPKD810 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 1 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DZUNPKD810` instruction, it unpacks byte 1 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[1])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD810(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dzunpkd810 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DZUNPKD810 ===== */

/* ===== Inline Function Start for DZUNPKD820 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DZUNPKD820 (UnSigned Unpacking Bytes 2 & 0)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DZUNPKD820 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 2 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DZUNPKD820` instruction, it unpacks byte 2 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[2])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD820(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dzunpkd820 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DZUNPKD820 ===== */

/* ===== Inline Function Start for DZUNPKD830 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DZUNPKD830 (UnSigned Unpacking Bytes 3 & 0)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DZUNPKD830 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 3 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DZUNPKD830` instruction, it unpacks byte 3 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD830(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dzunpkd830 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DZUNPKD830 ===== */

/* ===== Inline Function Start for DZUNPKD831 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DZUNPKD831 (UnSigned Unpacking Bytes 3 & 1)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DZUNPKD831 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 3 and byte 1 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DZUNPKD831` instruction, it unpacks byte 3 and byte 1 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[1])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD831(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dzunpkd831 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DZUNPKD831 ===== */

/* ===== Inline Function Start for DZUNPKD832 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
 * \brief    DZUNPKD832 (UnSigned Unpacking Bytes 3 & 2)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DZUNPKD832 Rd, Rs1
 * # Rd, Rs1 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Unpack byte 3 and byte 2 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
 *
 * **Description**:\n
 * For the `DZUNPKD832` instruction, it unpacks byte 3 and byte 2 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
 * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[2])
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD832(unsigned long long a)
{
    unsigned long long result;
    __ASM volatile("dzunpkd832 %0, %1" : "=r"(result) : "r"(a));
    return result;
}
/* ===== Inline Function End for DZUNPKD832 ===== */

/* ===== Inline Function Start for DKMMAC ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief   DKMMAC (64-bit MSW 32x32 Signed Multiply and Saturating Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMMAC Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element signed multiplications and saturating addition simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
 * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
 * and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *    res = sat.q31(dop + (aop s* bop)[63:32]);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMMAC(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmmac %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMMAC ===== */

/* ===== Inline Function Start for DKMMAC.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief   DKMMAC.u (64-bit MSW 32x32 Unsigned Multiply and Saturating Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMMAC.u Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element unsigned multiplications and saturating addition simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
 * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
 * and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   res = sat.q31(dop + RUND(aop u* bop)[63:32]);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMMAC_U(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmmac.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMMAC.u ===== */

/* ===== Inline Function Start for DKMMSB ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief   DKMMSB (64-bit MSW 32x32 Signed Multiply and Saturating Sub)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMMSB Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element signed multiplications and saturating subtraction simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
 * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
 * range and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *    res = sat.q31(dop - (aop s* bop)[63:32]);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMMSB(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmmsb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMMSB ===== */

/* ===== Inline Function Start for DKMMSB.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief   DKMMSB.u (64-bit MSW 32x32 Unsigned Multiply and Saturating Sub)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMMSB.u Rd, Rs1, Rs2
 * # Rd, Rs1, Rs2 are all even/odd pair of registers
 * ~~~
 *
 * **Purpose**:\n
 * Do MSW 32x32 element unsigned multiplications and saturating subtraction simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
 * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
 * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
 * range and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
 * adding a 1 to bit 31 of the results.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *    res = sat.q31(dop - (aop u* bop)[63:32]);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMMSB_U(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmmsb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMMSB.u ===== */

/* ===== Inline Function Start for DKMADA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMADA (Saturating Signed Multiply Two Halfs and Two Adds)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMADA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two 16x16 with 32-bit signed double addition simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
 * elements in Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   mul1 = aop.H[1] s* bop.H[1];
 *   mul2 = aop.H[0] s* bop.H[0];
 *   res = sat.q31(dop + mul1 + mul2);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMADA(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMADA ===== */

/* ===== Inline Function Start for DKMAXDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMAXDA (Two Cross 16x16 with 32-bit Signed Double Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMAXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross 16x16 with 32-bit signed double addition simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
 * elements in Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of
 * 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in elements in Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   mul1 = aop.H[1] s* bop.H[0];
 *   mul2 = aop.H[0] s* bop.H[1];
 *   res = sat.q31(dop + mul1 + mul2);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMAXDA(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmaxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMAXDA ===== */

/* ===== Inline Function Start for DKMADS ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DKMADS (Two 16x16 with 32-bit Signed Add and Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMADS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two 16x16 with 32-bit signed addition and subtraction simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
 * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
 * elements in Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   mul1 = aop.H[1] s* bop.H[1];
 *   mul2 = aop.H[0] s* bop.H[0];
 *   res = sat.q31(dop + mul1 - mul2);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMADS(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmads %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMADS ===== */

/* ===== Inline Function Start for DKMADRS ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DKMADRS (Two 16x16 with 32-bit Signed Add and Reversed Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMADRS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two 16x16 with 32-bit signed addition and revered subtraction simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
 * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
 * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
 * bit elements in Rs2
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   mul1 = aop.H[1] s* bop.H[1];
 *   mul2 = aop.H[0] s* bop.H[0];
 *   res = sat.q31(dop - mul1 + mul2);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMADRS(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmadrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMADRS ===== */

/* ===== Inline Function Start for DKMAXDS ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMAXDS (Saturating Signed Crossed Multiply Two Halfs & Subtract & Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMAXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross 16x16 with 32-bit signed addition and subtraction simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
 * the corresponding 32-bit elements in a third register. The addition result may be saturated.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   mul1 = aop.H[1] s* bop.H[0];
 *   mul2 = aop.H[0] s* bop.H[1];
 *   res = sat.q31(dop + mul1 - mul2);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMAXDS(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmaxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMAXDS ===== */

/* ===== Inline Function Start for DKMSDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMSDA (Two 16x16 with 32-bit Signed Double Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMSDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two 16x16 with 32-bit signed double subtraction simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   mul1 = aop.H[1] s* bop.H[0];
 *   mul2 = aop.H[0] s* bop.H[1];
 *   res = sat.q31(dop - mul1 - mul2);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMSDA(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmsda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMSDA ===== */

/* ===== Inline Function Start for DKMSXDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMSXDA (Two Cross 16x16 with 32-bit Signed Double Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMSXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross 16x16 with 32-bit signed double subtraction simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 16-bit content of the 32-bit elements of Rs1
 * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
 * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   mul1 = aop.H[1] s* bop.H[0];
 *   mul2 = aop.H[0] s* bop.H[1];
 *   res = sat.q31(dop - mul1 - mul2);
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKMSXDA(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmsxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMSXDA ===== */

/* ===== Inline Function Start for DSMAQA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMAQA (Four Signed 8x8 with 32-bit Signed Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMAQA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four signed 8x8 with 32-bit signed addition simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
 * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the signed
 * content of the corresponding 32-bit chunks of Rd. The final results are written back to the
 * corresponding 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   m0 = aop.B[0] s* bop.B[0];
 *   m1 = aop.B[1] s* bop.B[1];
 *   m2 = aop.B[2] s* bop.B[2];
 *   m3 = aop.B[3] s* bop.B[3];
 *   res = dop + m0 + m1 + m2 + m3;
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMAQA(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMAQA ===== */

/* ===== Inline Function Start for DSMAQA.SU ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMAQA.SU (Four Signed 8 x Unsigned 8 with 32-bit Signed Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMAQA.SU Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four Signed 8 x Unsigned 8 with 32-bit unsigned addition simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four
 * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the unsigned
 * content of the corresponding 32-bit chunks of Rd. The final results are written back to the
 * corresponding 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   m0 = aop.B[0] su* bop.B[0];
 *   m1 = aop.B[1] su* bop.B[1];
 *   m2 = aop.B[2] su* bop.B[2];
 *   m3 = aop.B[3] su* bop.B[3];
 *   res = dop + m0 + m1 + m2 + m3;
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DSMAQA_SU(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMAQA.SU ===== */

/* ===== Inline Function Start for DUMAQA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DUMAQA (Four Unsigned 8x8 with 32-bit Unsigned Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DUMAQA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four unsigned 8x8 with 32-bit unsigned addition simultaneously. The results are written into Rd.
 *
 * **Description**:\n
 * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four
 * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
 * unsigned content of the corresponding 32-bit chunks of Rd. The final results are written back to the
 * corresponding 32-bit chunks in Rd.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
 *
 * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
 *   m0 = aop.B[0] su* bop.B[0];
 *   m1 = aop.B[1] su* bop.B[1];
 *   m2 = aop.B[2] su* bop.B[2];
 *   m3 = aop.B[3] su* bop.B[3];
 *   res = dop + m0 + m1 + m2 + m3;
 * }
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DUMAQA(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dumaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DUMAQA ===== */

/* ===== Inline Function Start for DKMDA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMDA32 (Two Signed 32x32 with 64-bit Saturation Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 add the signed multiplication results with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
 * with the top 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * t0 = op1b s* op2b;
 * t1 = op1t s* op2t;
 * Rd = concat(rest, resb);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMDA32(unsigned long long a, unsigned long long b)
{
   long long result;
    __ASM volatile("dkmda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKMDA32 ===== */

/* ===== Inline Function Start for DKMXDA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross signed 32x32 and add the signed multiplication results with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
 * with the bottom 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * t01 = op1b s* op2t;
 * t10 = op1t s* op2b;
 * Rd = sat.q63(t01 + t10);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMXDA32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dkmxda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DKMXDA32 ===== */

/* ===== Inline Function Start for DKMADA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMADA32 (Two Signed 32x32 with 64-bit Saturation Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMADA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
 * with the top 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * t01 = op1b s* op2b;
 * t10 = op1t s* op2t;
 * Rd = sat.q63(t01 + t10);
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMADA32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMADA32 ===== */

/* ===== Inline Function Start for DKMAXDA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMAXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMAXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The
 * results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the top 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
 * with the top 32-bit element in Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * t01 = op1b s* op2t;
 * t10 = op1t s* op2b;
 * Rd = sat.q63(Rd + t01 + t10);
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMAXDA32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmaxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMAXDA32 ===== */

/* ===== Inline Function Start for DKMADS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMADS32 (Two Signed 32x32 with 64-bit Saturation Add and Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMADS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication results
 * and add a third register with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the top 32-bit element in Rs1 with the bottom 32-bit
 * element in Rs2 and then subtracts the result to the result of multiplying the top 32-bit element in Rs1
 * with the top 32-bit element in Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * t0 = op1b s* op2b;
 * t1 = op1t s* op2t;
 * Rd = sat.q63(Rd - t0 + t1);
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMADS32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmads32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMADS32 ===== */

/* ===== Inline Function Start for DKMADRS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMADRS32 (Two Signed 32x32 with 64-bit Saturation Revered Add and Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMADRS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The results
 * are written into Rd.Do two signed 32x32 and subtraction the top signed multiplication results and add bottom signed
 * multiplication results and add a third register with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the top 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element in Rs1 with the bottom 32-bit element in Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 * t0 = op1b s* op2b;
 * t1 = op1t s* op2t;
 * Rd = sat.q63(Rd + t0 - t1);
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMADRS32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmadrs32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMADRS32 ===== */

/* ===== Inline Function Start for DKMAXDS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMAXDS32 (Two Cross Signed 32x32 with 64-bit Saturation Add and Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMAXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication results
 * and add a third register with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 32-bit element in Rs1 with the top 32-bit
 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
 * Rs1 with the bottom 32-bit element in Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * t01 = op1b s* op2t;
 * t10 = op1t s* op2b;
 * Rd = sat.q63(Rd - t01 + t10);
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMAXDS32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmaxds32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMAXDS32 ===== */

/* ===== Inline Function Start for DKMSDA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMSDA32 (Two Signed 32x32 with 64-bit Saturation Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMSDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and subtraction the top signed multiplication results and subtraction bottom signed multiplication
 * results and add a third register with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * t0 = op1b s* op2b;
 * t1 = op1t s* op2t;
 * Rd = sat.q63(Rd - t0 - t1);
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMSDA32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmsda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMSDA32 ===== */

/* ===== Inline Function Start for DKMSXDA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DKMSXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DKMSXDA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross signed 32x32 and subtraction the top signed multiplication results and subtraction bottom signed multiplication
 * results and add a third register with Q63 saturation. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * t0 = op1b s* op2t;
 * t1 = op1t s* op2b;
 * Rd = sat.q63(Rd - t0 - t1);
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMSXDA32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmsxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMSXDA32 ===== */

/* ===== Inline Function Start for DSMDS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMDS32 (Two Signed 32x32 with 64-bit Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication. The
 * results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the top 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * t0 = op1b s* op2t;
 * t1 = op1t s* op2b;
 * Rd = t1 - t0;
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMDS32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMDS32 ===== */

/* ===== Inline Function Start for DSMDRS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMDRS32 (Two Signed 32x32 with 64-bit Revered Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMDRS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and subtraction the top signed multiplication results and add bottom signed multiplication. The results are written into Rd
 *
 * **Description**:\n
 * It multiplies the top 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
 * element of Rs1 with the bottom 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * t0 = op1b s* op2b;
 * t1 = op1t s* op2t;
 * Rd = t1 - t0;
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMDRS32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmdrs32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMDRS32 ===== */

/* ===== Inline Function Start for DSMXDS32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMXDS32 (Two Cross Signed 32x32 with 64-bit Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMXDS32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication.
 * The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit
 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
 * Rs1 with the bottom 32-bit element of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * t01 = op1b s* op2t;
 * t10 = op1t s* op2b;
 * Rd = t1 - t0;
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMXDS32(unsigned long long a, unsigned long long b)
{
    long long result;
    __ASM volatile("dsmxds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMXDS32 ===== */

/* ===== Inline Function Start for DSMALDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMALDA (Four Signed 16x16 with 64-bit Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMALDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four signed 16x16 and add signed multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
 * the top 16-bit content of Rs2 with unlimited precision
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.H[0] s* op2b.H[0];
 * m1 = op1b.H[1] s* op2b.H[1];
 * m2 = op1t.H[0] s* op2t.H[0];
 * m3 = op1t.H[1] s* op2t.H[1];
 *
 * Rd = Rd + m0 + m1 + m2 + m3;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALDA(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmalda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALDA ===== */

/* ===== Inline Function Start for DSMALXDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMALXDA (Four Signed 16x16 with 64-bit Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMALXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four cross signed 16x16 and add signed multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the top 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
 * with the top 16-bit content of Rs2 with unlimited precision.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.H[0] s* op2b.H[1];
 * m1 = op1b.H[1] s* op2b.H[0];
 * m2 = op1t.H[0] s* op2t.H[1];
 * m3 = op1t.H[1] s* op2t.H[0];
 *
 * Rd = Rd + m0 + m1 + m2 + m3;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALXDA(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmalxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALXDA ===== */

/* ===== Inline Function Start for DSMALDS ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMALDS (Four Signed 16x16 with 64-bit Add and Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMALDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four signed 16x16 and add and subtraction signed multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the top 16-bit content of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.H[1] s* op2b.H[1];
 * m1 = op1b.H[0] s* op2b.H[0];
 * m2 = op1t.H[1] s* op2t.H[1];
 * m3 = op1t.H[0] s* op2t.H[0];
 *
 * Rd = Rd + m0 - m1 + m2 - m3;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALDS(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmalds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALDS ===== */

/* ===== Inline Function Start for DSMALDRS ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief DSMALDRS (Four Signed 16x16 with 64-bit Add and Revered Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMALDRS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16x16 and add and revered subtraction signed multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the top 16-bit content of Rs1 with the top 16-bit content
 * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
 * with the bottom 16-bit content of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.H[0] s* op2b.H[0];
 * m1 = op1b.H[1] s* op2b.H[1];
 * m2 = op1t.H[0] s* op2t.H[0];
 * m3 = op1t.H[1] s* op2t.H[1];
 *
 * Rd = Rd + m0 - m1 + m2 - m3;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALDRS(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmaldrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALDRS ===== */

/* ===== Inline Function Start for DSMALXDS ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DSMALXDS (Four Cross Signed 16x16 with 64-bit Add and Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMALXDS Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four cross signed 16x16 and add and subtraction signed multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 16-bit content of Rs1 with the top 16-bit
 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
 * Rs1 with the bottom 16-bit content of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.H[1] s* op2b.H[0];
 * m1 = op1b.H[0] s* op2b.H[1];
 * m2 = op1t.H[1] s* op2t.H[0];
 * m3 = op1t.H[0] s* op2t.H[1];
 *
 * Rd = Rd + m0 - m1 + m2 - m3;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALXDS(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmalxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALXDS ===== */

/* ===== Inline Function Start for DSMSLDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DSMSLDA (Four Signed 16x16 with 64-bit Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMSLDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four signed 16x16 and subtraction signed multiplication results and add a third register. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
 * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.H[0] s* op2b.H[0];
 * m1 = op1b.H[1] s* op2b.H[1];
 * m2 = op1t.H[0] s* op2t.H[0];
 * m3 = op1t.H[1] s* op2t.H[1];
 *
 * Rd = Rd - m0 - m1 - m2 - m3;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMSLDA(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmslda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMSLDA ===== */

/* ===== Inline Function Start for DSMSLXDA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DSMSLXDA (Four Cross Signed 16x16 with 64-bit Sub)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMSLXDA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do four signed 16x16 and subtraction signed multiplication results and add a third register. The results are written into Rd.
 *
 * **Description**:\n
 * It multiplies the top 16-bit content of Rs1 with the bottom 16-bit
 * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.H[0] s* op2b.H[1];
 * m1 = op1b.H[1] s* op2b.H[0];
 * m2 = op1t.H[0] s* op2t.H[1];
 * m3 = op1t.H[1] s* op2t.H[0];
 *
 * Rd = Rd - m0 - m1 - m2 - m3;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMSLXDA(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmslxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMSLXDA ===== */

/* ===== Inline Function Start for DDSMAQA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DDSMAQA (Eight Signed 8x8 with 64-bit Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DDSMAQA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do eight signed 8x8 and add signed multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * Do eight signed 8-bit multiplications from eight 8-bit chunks of two registers; and then adds
 * the eight 16-bit results and the content of 64-bit chunks of a third register.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.B[0] s* op2b.B[0];
 * m1 = op1b.B[1] s* op2b.B[1];
 * m2 = op1b.B[2] s* op2b.B[2];
 * m3 = op1b.B[3] s* op2b.B[3];
 * m4 = op1t.B[0] s* op2t.B[0];
 * m5 = op1t.B[1] s* op2t.B[1];
 * m6 = op1t.B[2] s* op2t.B[2];
 * m7 = op1t.B[3] s* op2t.B[3];
 *
 * s0 = m0 + m1 + m2 + m3;
 * s1 = m4 + m5 + m6 + m7;
 * Rd = Rd + s0 + s1;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DDSMAQA(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("ddsmaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DDSMAQA ===== */

/* ===== Inline Function Start for DDSMAQA.SU ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DDSMAQA.SU (Eight Signed 8 x Unsigned 8 with 64-bit Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DDSMAQA.SU Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do eight signed 8 x unsigned 8 and add signed multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * Do eight signed 8 x unsigned 8 and add signed multiplication results and a third register; and then adds
 * the eight 16-bit results and the content of 64-bit chunks of a third register.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.B[0] su* op2b.B[0];
 * m1 = op1b.B[1] su* op2b.B[1];
 * m2 = op1b.B[2] su* op2b.B[2];
 * m3 = op1b.B[3] su* op2b.B[3];
 * m4 = op1t.B[0] su* op2t.B[0];
 * m5 = op1t.B[1] su* op2t.B[1];
 * m6 = op1t.B[2] su* op2t.B[2];
 * m7 = op1t.B[3] su* op2t.B[3];
 *
 * s0 = m0 + m1 + m2 + m3;
 * s1 = m4 + m5 + m6 + m7;
 * Rd = Rd + s0 + s1;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DDSMAQA_SU(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("ddsmaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DDSMAQA.SU ===== */

/* ===== Inline Function Start for DDUMAQA ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief  DDUMAQA (Eight Unsigned 8x8 with 64-bit Unsigned Add)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DDUMAQA Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do eight unsigned 8x8 and add unsigned multiplication results and a third register. The results are written into Rd.
 *
 * **Description**:\n
 * Do eight unsigned 8x8 and add unsigned multiplication results and a third register; and then adds
 * the eight 16-bit results and the content of 64-bit chunks of a third register.
 *
 * **Operations**:\n
 * ~~~
 * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
 * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
 *
 * m0 = op1b.B[0] u* op2b.B[0];
 * m1 = op1b.B[1] u* op2b.B[1];
 * m2 = op1b.B[2] u* op2b.B[2];
 * m3 = op1b.B[3] u* op2b.B[3];
 * m4 = op1t.B[0] u* op2t.B[0];
 * m5 = op1t.B[1] u* op2t.B[1];
 * m6 = op1t.B[2] u* op2t.B[2];
 * m7 = op1t.B[3] u* op2t.B[3];
 *
 * s0 = m0 + m1 + m2 + m3;
 * s1 = m4 + m5 + m6 + m7;
 * Rd = Rd + s0 + s1;
 * x=0
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DDUMAQA(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("ddumaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DDUMAQA ===== */

/* ===== Inline Function Start for DSMA32.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMA32.u (64-bit SIMD 32-bit Signed Multiply Addition With Rounding and Clip)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMA32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and add signed multiplication results with Rounding, then right shift 32-bit and clip q63 to q31.
 * The result is written to Rd.
 *
 * **Description**:\n
 * For the `DSMA32.u` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the top 32-bit Q31
 * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with
 * the bottom 32-bit Q31 content of 64-bit chunks in Rs2.
 * Then, do the addtion for the results above and perform the addtional rounding operations, and then move the data to the right
 * by 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd = (q31_t)((Rs1.W[x] s* Rs2.W[x] + Rs1.W[x + 1] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_DSMA32_U(unsigned long long a, unsigned long long b)
{
    long result;
    __ASM volatile("dsma32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMA32.u ===== */

/* ===== Inline Function Start for DSMXS32.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMXS32.u (64-bit SIMD 32-bit Signed Multiply Cross Subtraction With Rounding and Clip)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMXS32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross signed 32x32 and sub signed multiplication results with Rounding, then right shift 32-bit and clip q63 to
 * q31. The result is written to Rd.
 *
 * **Description**:\n
 * For the `DSMXS32.u` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit
 * Q31 content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1
 * with the top 32-bit Q31 content of 64-bit chunks in Rs2.
 * Then, do the subtraction for the results above and perform the addtional rounding operations, and then move the data to the right by
 * 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd = (q31_t)((Rs1.W[x + 1] s* Rs2.W[x] - Rs1.W[x] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_DSMXS32_U(unsigned long long a, unsigned long long b)
{
    long result;
    __ASM volatile("dsmxs32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMXS32.u ===== */

/* ===== Inline Function Start for DSMXA32.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMXA32.u (64-bit SIMD 32-bit Signed Cross Multiply Addition with Rounding and Clip)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMXA32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross signed 32x32 and add signed multiplication results with Rounding, then right shift 32-bit and clip q63 to
 * q31. The result is written to Rd.
 *
 * **Description**:\n
 * For the `DSMXA32.u` instruction,multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit Q31
 * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with
 * the top 32-bit Q31 content of 64-bit chunks in Rs2.
 * Then, do the addtion for the results above and perform the addtional rounding operations, and then move the data to the right
 * by 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd = (q31_t)((Rs1.W[x + 1] s* Rs2.W[x] + Rs1.W[x] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_DSMXA32_U(unsigned long long a, unsigned long long b)
{
    long result;
    __ASM volatile("dsmxa32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMXA32.u ===== */

/* ===== Inline Function Start for DSMS32.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMS32.u (64-bit SIMD 32-bit Signed Multiply Subtraction with Rounding and Clip)
 * \details
 * **Type**: DSP
 *
 * **Syntax**:\n
 * ~~~
 * DSMS32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 32x32 and sub signed multiplication results with Rounding, then right shift 32-bit and clip q63 to q31. The
 * result is written to Rd.
 *
 * **Description**:\n
 * For the `DSMS32.u` instruction, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit
 * Q31 content of 64-bit chunks in Rs2. At the same time, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with
 * the top 32-bit Q31 content of 64-bit chunks in Rs2.
 * Then, do the subtraction for the results above and perform the addtional rounding operations, and then move the data to the right by
 * 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Rd = (q31_t)((Rs1.W[x] s* Rs2.W[x] - Rs1.W[x + 1] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
 * x=0
 * ~~~
 *
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_DSMS32_U(unsigned long long a, unsigned long long b)
{
    long result;
    __ASM volatile("dsms32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
    return result;
}
/* ===== Inline Function End for DSMS32.u ===== */

/* ===== Inline Function Start for DSMADA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMADA16 (Signed Multiply Two Halfs and Two Adds 32-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMADA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications of two 32-bit registers; and then adds the 32-bit results and the 32-bit value of an
 * even/odd pair of registers together.
 * * DSMADA16: rt pair+ top*top + bottom*bottom
 *
 * **Description**:\n
 * This instruction multiplies the per 16-bit content of the 32-bit elements of Rs1 with the corresponding 16-bit content of
 * the 32-bit elements of Rs2. The result is added to the 32-bit value of an even/odd pair of registers specified by Rd(4,1).
 * The 32-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 32-bit value of the
 * register-pair are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
 * Rd.W = Rd.W + SE32(Mres0[0][31:0]) + SE32(Mres1[0][31:0]) + SE32(Mres0[1][31:0]) + SE32(Mres1[1][31:0]);
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_DSMADA16(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmada16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return (long)t;
}
/* ===== Inline Function End for DSMADA16 ===== */

/* ===== Inline Function Start for DSMAXDA16 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMAXDA16 (Signed Crossed Multiply Two Halfs and Two Adds 32-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMAXDA16 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two signed 16-bit multiplications of two 32-bit registers; and then adds the 32-bit results and the 32-bit value of an
 * even/odd pair of registers together.
 * * DSMAXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
 *
 * **Description**:\n
 * This instruction crossly multiplies the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit
 * elements of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of the 32-bit elements of
 * Rs1 with the top 16-bit content of the 32-bit elements of Rs2 with unlimited precision. The result is added to the 64-bit
 * value of an even/odd pair of registers specified by Rd(4,1).The 64-bit addition result is clipped to 32-bit result.
 *
 * **Operations**:\n
 * ~~~
 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
 * Rd.W = Rd.W + SE32(Mres0[0][31:0]) + SE32(Mres1[0][31:0]) + SE32(Mres0[1][31:0]) + SE32(Mres1[1][31:0]);
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_DSMAXDA16(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmaxda16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return (long)t;
}
/* ===== Inline Function End for DSMAXDA16 ===== */

/* ===== Inline Function Start for DKSMS32.u ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DKSMS32.u (Two Signed Multiply Shift-clip and Saturation with Rounding)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKSMS32.u Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Computes saturated multiplication of two pairs of q31 type with shifted rounding.
 *
 * **Description**:\n
 * Compute the multiplication of Rs1 and Rs2 of type q31_t, intercept [47:16] for the resulting 64-bit product
 * to get the 32-bit number, then add 1 to it to do rounding, and finally saturate the result after rounding.
 *
 * **Operations**:\n
 * ~~~
 * Mres[x][63:0] = Rs1.W[x] s* Rs2.W[x];
 * Round[x][32:0] = Mres[x][47:15] + 1;
 * Rd.W[x] = sat.31(Rd.W[x] + Round[x][32:1]);
 * x=1...0
 * ~~~
 *
 * \param [in]  t unsigned long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE unsigned long long __RV_DKSMS32_U(unsigned long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dksms32.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKSMS32.u ===== */

/* ===== Inline Function Start for DMADA32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DMADA32 ((Two Cross Signed 32x32 with 64-bit Add and Clip to 32-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DMADA32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Do two cross signed 32x32 and add the signed multiplication results to q63, then clip the q63 result to q31 , the final results
 * are written into Rd.
 *
 * **Description**:\n
 * For the `DMADA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit element in Rs2 and
 * then adds the result to the result of multiplying the bottom 32-bit element in Rs1 with the top 32-bit element in Rs2, then
 * clip the q63 result to q31.
 *
 * **Operations**:\n
 * ~~~
 * res = (q31_t)((((q63_t) Rd.w[0] << 32) + (q63_t)Rs1.w[0] s*  Rs2.w[1] + (q63_t)Rs1.w[1] s*  Rs2.w[0]) s>> 32);
 * rd = res;
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long type
 */
__STATIC_FORCEINLINE long __RV_DMADA32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return (long)t;
}
/* ===== Inline Function End for DMADA32 ===== */

/* ===== Inline Function Start for DSMALBB ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMALBB (Signed Multiply Bottom Halfs & Add 64-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMALBB Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit
 * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result 
 * is written back to the register-pair.
 * * DSMALBB: rt pair + bottom*bottom (all 32-bit elements)
 *
 * **Description**:\n
 * For the `DSMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit content of Rs2.The
 * multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd.
 *
 * **Operations**:\n
 * ~~~
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALBB(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmalbb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALBB ===== */

/* ===== Inline Function Start for DSMALBT ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMALBT (Signed Multiply Bottom Half & Top Half & Add 64-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMALBT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit
 * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result
 * is written back to the register-pair.
 * * DSMALBT: rt pair + bottom*top (all 32-bit elements)
 *
 * **Description**:\n
 * For the `DSMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
 * content of the 32-bit elements of Rs2.
 * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
 * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers
 *
 * **Operations**:\n
 * ~~~
 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALBT(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmalbt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALBT ===== */

/* ===== Inline Function Start for DSMALTT ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DSMALTT (Signed Multiply Top Half & Add 64-bit)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DSMALTT Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit
 * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result
 * is written back to the register-pair.
 * * DSMALTT: DSMALTT rt pair + top*top (all 32-bit elements)
 *
 * **Description**:\n
 * For the `DSMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
 * content of the 32-bit elements of Rs2.
 * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
 * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
 * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DSMALTT(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dsmaltt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DSMALTT ===== */

/* ===== Inline Function Start for DKMABB32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DKMABB32 (Saturating Signed Multiply Bottom Words & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMABB32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content
 * of 64-bit data in the third register. The addition result may besaturated and is written to the third register.
 * * DKMABB32: rd + bottom*bottom
 *
 * **Description**:\n
 * For the `DKMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit element in Rs2
 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range
 * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd.
 * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[0] * Rs2.W[0]);
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMABB32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmabb32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMABB32 ===== */

/* ===== Inline Function Start for DKMABT32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DKMABT32 (Saturating Signed Multiply Bottom & Top Words & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMABT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content
 * of 64-bit data in the third register. The addition result may be saturated and is written to the third register.
 * * DKMABT32: rd + bottom*top
 *
 * **Description**:\n
 * For the `DKMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit element in Rs2
 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range
 * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd.
 * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[0] * Rs2.W[1]);
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMABT32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmabt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMABT32 ===== */

/* ===== Inline Function Start for DKMATT32 ===== */
/**
 * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
 * \brief    DKMATT32 (Saturating Signed Multiply Bottom & Top Words & Add)
 * \details
 * **Type**: SIMD
 *
 * **Syntax**:\n
 * ~~~
 * DKMATT32 Rd, Rs1, Rs2
 * ~~~
 *
 * **Purpose**:\n
 * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content
 * of 64-bit data in the third register. The addition result may be saturated and is written to the third register.
 * * DKMATT32: rd + top*top
 *
 * **Description**:\n
 * For the `DKMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit element in Rs2
 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range
 * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd.
 * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
 *
 * **Operations**:\n
 * ~~~
 * res = Rd + (Rs1.W[1] * Rs2.W[1]);
 * if (res > (2^63)-1) {
 *   res = (2^63)-1;
 *   OV = 1;
 * } else if (res < -2^63) {
 *   res = -2^63;
 *   OV = 1;
 * }
 * Rd = res;
 * ~~~
 *
 * \param [in]  t long long type of value stored in t
 * \param [in]  a unsigned long long type of value stored in a
 * \param [in]  b unsigned long long type of value stored in b
 * \return value stored in unsigned long long type
 */
__STATIC_FORCEINLINE long long __RV_DKMATT32(long long t, unsigned long long a, unsigned long long b)
{
    __ASM volatile("dkmatt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
    return t;
}
/* ===== Inline Function End for DKMATT32 ===== */
#endif /* __RISCV_XLEN == 32 */

#elif defined (__ICCRISCV__)

#if __riscv_xlen == 32
#include "iar_nds32_intrinsic.h"
#elif __riscv_xlen == 64
#include "iar_nds64_intrinsic.h"
#else
#error "Unexpected RISC-V XLEN size."
#endif /* __riscv_xlen == 32 */

#pragma language=save
#pragma language=extended

// Redefine those compatible instruction name supplied by IAR
#define __RV_CLROV              __nds__clrov
#define __RV_RDOV               __nds__rdov
#define __RV_ADD8               __nds__add8
#define __RV_SUB8               __nds__sub8
#define __RV_ADD16              __nds__add16
#define __RV_SUB16              __nds__sub16
#define __RV_ADD64              __nds__add64
#define __RV_SUB64              __nds__sub64
#define __RV_RADD8              __nds__radd8
#define __RV_RSUB8              __nds__rsub8
#define __RV_RADD16             __nds__radd16
#define __RV_RSUB16             __nds__rsub16
#define __RV_RADD64             __nds__radd64
#define __RV_RSUB64             __nds__rsub64
#define __RV_RADDW              __nds__raddw
#define __RV_RSUBW              __nds__rsubw
#define __RV_URADD8             __nds__uradd8
#define __RV_URSUB8             __nds__ursub8
#define __RV_URADD16            __nds__uradd16
#define __RV_URSUB16            __nds__ursub16
#define __RV_URADD64            __nds__uradd64
#define __RV_URSUB64            __nds__ursub64
#define __RV_URADDW             __nds__uraddw
#define __RV_URSUBW             __nds__ursubw
#define __RV_KADD8              __nds__kadd8
#define __RV_KSUB8              __nds__ksub8
#define __RV_KADD16             __nds__kadd16
#define __RV_KSUB16             __nds__ksub16
#define __RV_KADD64             __nds__kadd64
#define __RV_KSUB64             __nds__ksub64
#define __RV_KADDH              __nds__kaddh
#define __RV_KSUBH              __nds__ksubh
#define __RV_KADDW              __nds__kaddw
#define __RV_KSUBW              __nds__ksubw
#define __RV_UKADD8             __nds__ukadd8
#define __RV_UKSUB8             __nds__uksub8
#define __RV_UKADD16            __nds__ukadd16
#define __RV_UKSUB16            __nds__uksub16
#define __RV_UKADD64            __nds__ukadd64
#define __RV_UKSUB64            __nds__uksub64
#define __RV_UKADDH             __nds__ukaddh
#define __RV_UKSUBH             __nds__uksubh
#define __RV_UKADDW             __nds__ukaddw
#define __RV_UKSUBW             __nds__uksubw
#define __RV_CRAS16             __nds__cras16
#define __RV_CRSA16             __nds__crsa16
#define __RV_RCRAS16            __nds__rcras16
#define __RV_RCRSA16            __nds__rcrsa16
#define __RV_URCRAS16           __nds__urcras16
#define __RV_URCRSA16           __nds__urcrsa16
#define __RV_KCRAS16            __nds__kcras16
#define __RV_KCRSA16            __nds__kcrsa16
#define __RV_UKCRAS16           __nds__ukcras16
#define __RV_UKCRSA16           __nds__ukcrsa16
#define __RV_SRA8               __nds__sra8
#define __RV_SRAI8              __nds__sra8
#define __RV_SRA16              __nds__sra16
#define __RV_SRAI16             __nds__sra16
#define __RV_SRL8               __nds__srl8
#define __RV_SRL16              __nds__srl16
#define __RV_SLL8               __nds__sll8
#define __RV_SLL16              __nds__sll16
#define __RV_SRA_U              __nds__sra_u
#define __RV_SRA8_U             __nds__sra8_u
#define __RV_SRA16_U            __nds__sra16_u
#define __RV_SRL8_U             __nds__srl8_u
#define __RV_SRL16_U            __nds__srl16_u
#define __RV_KSLL8              __nds__ksll8
#define __RV_KSLL16             __nds__ksll16
#define __RV_KSLLW              __nds__ksllw
#define __RV_KSLRA8             __nds__kslra8
#define __RV_KSLRA8_U           __nds__kslra8_u
#define __RV_KSLRA16            __nds__kslra16
#define __RV_KSLRA16_U          __nds__kslra16_u
#define __RV_KSLRAW             __nds__kslraw
#define __RV_KSLRAW_U           __nds__kslraw_u
#define __RV_CMPEQ8             __nds__cmpeq8
#define __RV_CMPEQ16            __nds__cmpeq16
#define __RV_SCMPLE8            __nds__scmple8
#define __RV_SCMPLE16           __nds__scmple16
#define __RV_SCMPLT8            __nds__scmplt8
#define __RV_SCMPLT16           __nds__scmplt16
#define __RV_UCMPLE8            __nds__ucmple8
#define __RV_UCMPLE16           __nds__ucmple16
#define __RV_UCMPLT8            __nds__ucmplt8
#define __RV_UCMPLT16           __nds__ucmplt16
#define __RV_SMUL8              __nds__smul8
#define __RV_UMUL8              __nds__umul8
#define __RV_SMUL16             __nds__smul16
#define __RV_UMUL16             __nds__umul16
#define __RV_SMULX8             __nds__smulx8
#define __RV_UMULX8             __nds__umulx8
#define __RV_SMULX16            __nds__smulx16
#define __RV_UMULX16            __nds__umulx16
#define __RV_KHM8               __nds__khm8
#define __RV_KHMX8              __nds__khmx8
#define __RV_KHM16              __nds__khm16
#define __RV_KHMX16             __nds__khmx16
#define __RV_MULR64             __nds__mulr64
#define __RV_MULSR64            __nds__mulsr64
#define __RV_SMMUL              __nds__smmul
#define __RV_SMMUL_U            __nds__smmul_u
#define __RV_WEXT               __nds__wext
#define __RV_SUNPKD810          __nds__sunpkd810
#define __RV_SUNPKD820          __nds__sunpkd820
#define __RV_SUNPKD830          __nds__sunpkd830
#define __RV_SUNPKD831          __nds__sunpkd831
#define __RV_SUNPKD832          __nds__sunpkd832
#define __RV_ZUNPKD810          __nds__zunpkd810
#define __RV_ZUNPKD820          __nds__zunpkd820
#define __RV_ZUNPKD830          __nds__zunpkd830
#define __RV_ZUNPKD831          __nds__zunpkd831
#define __RV_ZUNPKD832          __nds__zunpkd832
#define __RV_PKBB16             __nds__pkbb16
#define __RV_PKBT16             __nds__pkbt16
#define __RV_PKTT16             __nds__pktt16
#define __RV_PKTB16             __nds__pktb16
#define __RV_KMMAC              __nds__kmmac
#define __RV_KMMAC_U            __nds__kmmac_u
#define __RV_KMMSB              __nds__kmmsb
#define __RV_KMMSB_U            __nds__kmmsb_u
#define __RV_KWMMUL             __nds__kwmmul
#define __RV_KWMMUL_U           __nds__kwmmul_u
#define __RV_SMMWB              __nds__smmwb
#define __RV_SMMWB_U            __nds__smmwb_u
#define __RV_SMMWT              __nds__smmwt
#define __RV_SMMWT_U            __nds__smmwt_u
#define __RV_KMMAWB             __nds__kmmawb
#define __RV_KMMAWB_U           __nds__kmmawb_u
#define __RV_KMMAWT             __nds__kmmawt
#define __RV_KMMAWT_U           __nds__kmmawt_u
#define __RV_KMMWB2             __nds__kmmwb2
#define __RV_KMMWB2_U           __nds__kmmwb2_u
#define __RV_KMMWT2             __nds__kmmwt2
#define __RV_KMMWT2_U           __nds__kmmwt2_u
#define __RV_KMMAWB2            __nds__kmmawb2
#define __RV_KMMAWB2_U          __nds__kmmawb2_u
#define __RV_KMMAWT2            __nds__kmmawt2
#define __RV_KMMAWT2_U          __nds__kmmawt2_u
#define __RV_SMBB16             __nds__smbb16
#define __RV_SMBT16             __nds__smbt16
#define __RV_SMTT16             __nds__smtt16
#define __RV_KMDA               __nds__kmda
#define __RV_KMXDA              __nds__kmxda
#define __RV_SMDS               __nds__smds
#define __RV_SMDRS              __nds__smdrs
#define __RV_SMXDS              __nds__smxds
#define __RV_KMABB              __nds__kmabb
#define __RV_KMABT              __nds__kmabt
#define __RV_KMATT              __nds__kmatt
#define __RV_KMADA              __nds__kmada
#define __RV_KMAXDA             __nds__kmaxda
#define __RV_KMADS              __nds__kmads
#define __RV_KMADRS             __nds__kmadrs
#define __RV_KMAXDS             __nds__kmaxds
#define __RV_KMSDA              __nds__kmsda
#define __RV_KMSXDA             __nds__kmsxda
#define __RV_SMAL               __nds__smal
#define __RV_SMAQA              __nds__smaqa
#define __RV_UMAQA              __nds__umaqa
#define __RV_SMAQA_SU           __nds__smaqa_su
#define __RV_SMAR64             __nds__smar64
#define __RV_SMSR64             __nds__smsr64
#define __RV_UMAR64             __nds__umar64
#define __RV_UMSR64             __nds__umsr64
#define __RV_KMAR64             __nds__kmar64
#define __RV_KMSR64             __nds__kmsr64
#define __RV_UKMAR64            __nds__ukmar64
#define __RV_UKMSR64            __nds__ukmsr64
#define __RV_SMALBB             __nds__smalbb
#define __RV_SMALBT             __nds__smalbt
#define __RV_SMALTT             __nds__smaltt
#define __RV_SMALDA             __nds__smalda
#define __RV_SMALXDA            __nds__smalxda
#define __RV_SMALDS             __nds__smalds
#define __RV_SMALDRS            __nds__smaldrs
#define __RV_SMALXDS            __nds__smalxds
#define __RV_SMSLDA             __nds__smslda
#define __RV_SMSLXDA            __nds__smslxda
#define __RV_MINW               __nds__minw
#define __RV_MAXW               __nds__maxw
#define __RV_SMIN8              __nds__smin8
#define __RV_SMAX8              __nds__smax8
#define __RV_SMIN16             __nds__smin16
#define __RV_SMAX16             __nds__smax16
#define __RV_UMIN8              __nds__umin8
#define __RV_UMAX8              __nds__umax8
#define __RV_UMIN16             __nds__umin16
#define __RV_UMAX16             __nds__umax16
#define __RV_KABS8              __nds__kabs8
#define __RV_KABS16             __nds__kabs16
#define __RV_KABSW              __nds__kabsw
#define __RV_SCLIP8             __nds__sclip8
#define __RV_SCLIP16            __nds__sclip16
#define __RV_SCLIP32            __nds__sclip32
#define __RV_UCLIP8             __nds__uclip8
#define __RV_UCLIP16            __nds__uclip16
#define __RV_UCLIP32            __nds__uclip32
#define __RV_CLO8               __nds__clo8
#define __RV_CLO16              __nds__clo16
#define __RV_CLO32              __nds__clo32
#define __RV_CLZ8               __nds__clz8
#define __RV_CLZ16              __nds__clz16
#define __RV_CLZ32              __nds__clz32
#define __RV_CLRS8              __nds__clrs8
#define __RV_CLRS16             __nds__clrs16
#define __RV_CLRS32             __nds__clrs32
#define __RV_SWAP8              __nds__swap8
#define __RV_SWAP16             __nds__swap16
#define __RV_KHMBB              __nds__khmbb
#define __RV_KHMBT              __nds__khmbt
#define __RV_KHMTT              __nds__khmtt
#define __RV_KDMBB              __nds__kdmbb
#define __RV_KDMBT              __nds__kdmbt
#define __RV_KDMTT              __nds__kdmtt
#define __RV_KDMABB             __nds__kdmabb
#define __RV_KDMABT             __nds__kdmabt
#define __RV_KDMATT             __nds__kdmatt
#define __RV_MADDR32            __nds__maddr32
#define __RV_MSUBR32            __nds__msubr32
#define __RV_PBSAD              __nds__pbsad
#define __RV_PBSADA             __nds__pbsada
#define __RV_AVE                __nds__ave
#define __RV_BITREV             __nds__bitrev
#define __RV_INSB               __nds__insb

#if (__riscv_xlen == 64)
#define __RV_ADD32              __nds__add32
#define __RV_SUB32              __nds__sub32
#define __RV_RADD32             __nds__radd32
#define __RV_RSUB32             __nds__rsub32
#define __RV_URADD32            __nds__uradd32
#define __RV_URSUB32            __nds__ursub32
#define __RV_KADD32             __nds__kadd32
#define __RV_KSUB32             __nds__ksub32
#define __RV_UKADD32            __nds__ukadd32
#define __RV_UKSUB32            __nds__uksub32
#define __RV_CRAS32             __nds__cras32
#define __RV_CRSA32             __nds__crsa32
#define __RV_RCRAS32            __nds__rcras32
#define __RV_RCRSA32            __nds__rcrsa32
#define __RV_URCRAS32           __nds__urcras32
#define __RV_URCRSA32           __nds__urcrsa32
#define __RV_KCRAS32            __nds__kcras32
#define __RV_KCRSA32            __nds__kcrsa32
#define __RV_UKCRAS32           __nds__ukcras32
#define __RV_UKCRSA32           __nds__ukcrsa32
#define __RV_SRA32              __nds__sra32
#define __RV_SRAI32             __nds__sra32
#define __RV_SRL32              __nds__srl32
#define __RV_SLL32              __nds__sll32
#define __RV_SLLI32             __nds__sll32
#define __RV_SRAW_U             __nds__sraw_u
#define __RV_SRA32_U            __nds__sra32_u
#define __RV_SRL32_U            __nds__srl32_u
#define __RV_KSLL32             __nds__ksll32
#define __RV_KSLRA32            __nds__kslra32
#define __RV_KSLRA32_U          __nds__kslra32_u
#define __RV_SMBB32             __nds__smbb32
#define __RV_SMBT32             __nds__smbt32
#define __RV_SMTT32             __nds__smtt32
#define __RV_PKBB32             __nds__pkbb32
#define __RV_PKBT32             __nds__pkbt32
#define __RV_PKTT32             __nds__pktt32
#define __RV_PKTB32             __nds__pktb32
#define __RV_SMIN32             __nds__smin32
#define __RV_SMAX32             __nds__smax32
#define __RV_UMIN32             __nds__umin32
#define __RV_UMAX32             __nds__umax32
#define __RV_KABS32             __nds__kabs32
#define __RV_KHMBB16            __nds__khmbb16
#define __RV_KHMBT16            __nds__khmbt16
#define __RV_KHMTT16            __nds__khmtt16
#define __RV_KDMBB16            __nds__kdmbb16
#define __RV_KDMBT16            __nds__kdmbt16
#define __RV_KDMTT16            __nds__kdmtt16
#define __RV_KDMABB16           __nds__kdmabb16
#define __RV_KDMABT16           __nds__kdmabt16
#define __RV_KDMATT16           __nds__kdmatt16
#define __RV_KMABB32            __nds__kmabb32
#define __RV_KMABT32            __nds__kmabt32
#define __RV_KMATT32            __nds__kmatt32
#define __RV_KMDA32             __nds__kmda32
#define __RV_KMXDA32            __nds__kmxda32
#define __RV_KMADA32            __nds__kmada32
#define __RV_KMAXDA32           __nds__kmaxda32
#define __RV_KMADS32            __nds__kmads32
#define __RV_KMADRS32           __nds__kmadrs32
#define __RV_KMAXDS32           __nds__kmaxds32
#define __RV_KMSDA32            __nds__kmsda32
#define __RV_KMSXDA32           __nds__kmsxda32
#define __RV_SMDS32             __nds__smds32
#define __RV_SMDRS32            __nds__smdrs32
#define __RV_SMXDS32            __nds__smxds32
#endif /* __riscv_xlen == 64 */

// For now, the P-extention version of IAR IDE is 0.5.0, but Nuclei's supports 0.5.4
// so Nuclei supplies a workround to add custom instructions of those not natively
// supported by the IAR Assembler. Note that __RV_BPICK remains to be implemented in future.
// And we only implement Xxldsp Nuclei custom instruction set, bpick not implemented, expdxx
// implemented in c, not via .insn variant

#pragma inline=forced_no_body
unsigned long __RV_STAS16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x7A,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_RSTAS16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x5A,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_KSTAS16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x62,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_URSTAS16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x6A,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_UKSTAS16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x72,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_STSA16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x7B,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_RSTSA16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x5B,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_KSTSA16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x63,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_URSTSA16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x6B,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_UKSTSA16(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x73,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

// #pragma inline=forced_no_body
// unsigned long __RV_BPICK(unsigned long a, unsigned long b, unsigned long c) {
    // TODO: remains to be done
// }

// RV64 only
#pragma inline=forced_no_body
unsigned long __RV_STAS32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x78,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_RSTAS32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x58,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_KSTAS32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x60,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_URSTAS32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x68,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_UKSTAS32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x70,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_STSA32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x79,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_RSTSA32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x59,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_KSTSA32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x61,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_URSTSA32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x69,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_UKSTSA32(unsigned long a, unsigned long b) {
    unsigned long r;
    __asm(".insn r 0x7F, 0x2, 0x71,   %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
    return r;
}

#pragma inline=forced_no_body
unsigned long __RV_EXPD80(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)(a & 0xff));
}

#pragma inline=forced_no_body
unsigned long __RV_EXPD81(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)((a >> 8) & 0xff));
}

#pragma inline=forced_no_body
unsigned long __RV_EXPD82(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)((a >> 16) & 0xff));
}

#pragma inline=forced_no_body
unsigned long __RV_EXPD83(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)((a >> 24) & 0xff));
}

#if __RISCV_XLEN == 64
// RV64 only
#pragma inline=forced_no_body
unsigned long __RV_EXPD84(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)((a >> 32) & 0xff));
}

#pragma inline=forced_no_body
unsigned long __RV_EXPD85(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)((a >> 40) & 0xff));
}

#pragma inline=forced_no_body
unsigned long __RV_EXPD86(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)((a >> 48) & 0xff));
}

#pragma inline=forced_no_body
unsigned long __RV_EXPD87(unsigned long a)
{
    return __EXPD_BYTE((uint8_t)((a >> 56) & 0xff));
}
#endif
#pragma language=restore

#else
    #error Unknown compiler
#endif /* __ICCRISCV__ */


/* XXXXX ARM Compatiable SIMD API XXXXX */
/** \brief Q setting quad 8-bit saturating addition. */
#define __QADD8(x, y)               __RV_KADD8(x, y)
/** \brief Q setting quad 8-bit saturating subtract. */
#define __QSUB8(x, y)               __RV_KSUB8((x), (y))
/** \brief Q setting dual 16-bit saturating addition. */
#define __QADD16(x, y)              __RV_KADD16((x), (y))
/** \brief Dual 16-bit signed addition with halved results. */
#define __SHADD16(x, y)             __RV_RADD16((x), (y))
/** \brief Q setting dual 16-bit saturating subtract. */
#define __QSUB16(x, y)              __RV_KSUB16((x), (y))
/** \brief Dual 16-bit signed subtraction with halved results. */
#define __SHSUB16(x, y)             __RV_RSUB16((x), (y))
/** \brief Q setting dual 16-bit add and subtract with exchange. */
#define __QASX(x, y)                __RV_KCRAS16((x), (y))
/** \brief Dual 16-bit signed addition and subtraction with halved results.*/
#define __SHASX(x, y)               __RV_RCRAS16((x), (y))
/** \brief Q setting dual 16-bit subtract and add with exchange. */
#define __QSAX(x, y)                __RV_KCRSA16((x), (y))
/** \brief Dual 16-bit signed subtraction and addition with halved results.*/
#define __SHSAX(x, y)               __RV_RCRSA16((x), (y))
/** \brief Dual 16-bit signed multiply with exchange returning difference. */
#define __SMUSDX(x, y)              __RV_SMXDS((y), (x))
/** \brief Q setting sum of dual 16-bit signed multiply with exchange. */
__STATIC_FORCEINLINE long __SMUADX (unsigned long op1, unsigned long op2)
{
    return __RV_KMXDA(op1, op2);
}
/** \brief Q setting saturating add. */
#define __QADD(x, y)                __RV_KADDW((x), (y))
/** \brief Q setting saturating subtract. */
#define __QSUB(x, y)                __RV_KSUBW((x), (y))
/** \brief Q setting dual 16-bit signed multiply with single 32-bit accumulator. */
__STATIC_FORCEINLINE long __SMLAD(unsigned long op1, unsigned long op2, long acc)
{
    return __RV_KMADA(acc, op1, op2);
}
/** \brief Q setting pre-exchanged dual 16-bit signed multiply with single 32-bit accumulator.  */
__STATIC_FORCEINLINE long __SMLADX(unsigned long op1, unsigned long op2, long acc)
{
    return __RV_KMAXDA(acc, op1, op2);
}
/** \brief Q setting dual 16-bit signed multiply with exchange subtract with 32-bit accumulate.  */
__STATIC_FORCEINLINE long __SMLSDX(unsigned long op1, unsigned long op2, long acc)
{
    return (acc - __RV_SMXDS(op1, op2));
}
/** \brief Dual 16-bit signed multiply with single 64-bit accumulator. */
__STATIC_FORCEINLINE long long __SMLALD(unsigned long op1, unsigned long op2, long long acc)
{
    return __RV_SMALDA(acc, op1, op2);
}
/** \brief Dual 16-bit signed multiply with exchange with single 64-bit accumulator.  */
__STATIC_FORCEINLINE long long __SMLALDX(unsigned long op1, unsigned long op2, long long acc)
{
    return __RV_SMALXDA(acc, op1, op2);
}
/** \brief Q setting sum of dual 16-bit signed multiply. */
__STATIC_FORCEINLINE long __SMUAD(unsigned long op1, unsigned long op2)
{
    return __RV_KMDA(op1, op2);
}
/** \brief Dual 16-bit signed multiply returning difference. */
__STATIC_FORCEINLINE long __SMUSD(unsigned long op1, unsigned long op2)
{
    return __RV_SMDRS(op1, op2);
}
/** \brief Dual extract 8-bits and sign extend each to 16-bits. */
#define __SXTB16(x)             __RV_SUNPKD820(x)
/** \brief Dual extracted 8-bit to 16-bit signed addition. TODO Need test */
__STATIC_FORCEINLINE unsigned long __SXTAB16(unsigned long op1, unsigned long op2)
{
    return __RV_ADD16(op1, __RV_SUNPKD820(op2));
}
#define __SXTAB16_RORn(ARG1, ARG2, ROTATE)        __SXTAB16(ARG1, __ROR(ARG2, ROTATE))

/** \brief 32-bit signed multiply with 32-bit truncated accumulator. */
__STATIC_FORCEINLINE long __SMMLA(long op1, long op2, long acc)
{
    long mul;
    mul = __RV_SMMUL(op1, op2);
    return (acc + mul);
}
#define __DKHM8                 __RV_DKHM8
#define __DKHM16                __RV_DKHM16
#define __DKSUB16               __RV_DKSUB16
#define __SMAQA                 __RV_SMAQA
#define __MULSR64               __RV_MULSR64
#define __DQADD8                __RV_DKADD8
#define __DQSUB8                __RV_DKSUB8
#define __DKADD16               __RV_DKADD16
#define __PKBB16                __RV_PKBB16
#define __DKSLRA16              __RV_DKSLRA16
#define __DKSLRA8               __RV_DKSLRA8
#define __KABSW                 __RV_KABSW
#define __DKABS8                __RV_DKABS8
#define __DKABS16               __RV_DKABS16
#define __SMALDA                __RV_SMALDA
#define __SMSLDA                __RV_SMSLDA
#define __SMALBB                __RV_SMALBB
#define __SUB64                 __RV_SUB64
#define __ADD64                 __RV_ADD64
#define __SMBB16                __RV_SMBB16
#define __SMBT16                __RV_SMBT16
#define __SMTT16                __RV_SMTT16
#define __EXPD80                __RV_EXPD80
#define __SMAX8                 __RV_SMAX8
#define __SMAX16                __RV_SMAX16
#define __PKTT16                __RV_PKTT16
#define __KADD16                __RV_KADD16
#define __SADD16                __RV_ADD16
#define __SSUB8                 __RV_KSUB8
#define __SADD8                 __RV_KADD8
#define __USAT16                __RV_UCLIP16
#define __SMALTT                __RV_SMALTT

/** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3. */
#define __PKHBT(ARG1, ARG2, ARG3)  ((ARG3 == 0) ? __RV_PKTB16(ARG2, ARG1) :              \
                                   (ARG3 == 16) ? __RV_PKBB16(ARG2, ARG1) :              \
                                   (((((uint32_t)(ARG1))          ) & 0x0000FFFFUL) |    \
                                   ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)))

/** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3. */
#define __PKHTB(ARG1, ARG2, ARG3)  ((ARG3 == 0) ? __RV_PKTB16(ARG1, ARG2) :              \
                                   (ARG3 == 16) ? __RV_PKTT16(ARG1, ARG2) :              \
                                   (((((uint32_t)(ARG1))          ) & 0xFFFF0000UL) |    \
                                   ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)))

#if __RISCV_XLEN == 64
/** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3，
    and also combines the [47:32] of val1 with bits[63:48] of val2 with the val3, finally pack the two new 32-bits to 64-bit. */
#define __PKHBT64(ARG1, ARG2, ARG3)  ((ARG3 == 0) ? __RV_PKTB16(ARG2, ARG1) :             \
                                   (ARG3 == 16) ? __RV_PKBB16(ARG2, ARG1) :              \
                                   ((int64_t)((((uint32_t)((uint64_t)ARG1 >> 32)) & 0x0000FFFFUL) |           \
                                   ((((uint32_t)((uint64_t)ARG2 >> 32)) << (ARG3)) & 0xFFFF0000UL)) << 32) |  \
                                   ((int64_t)(((((uint32_t)(ARG1))) & 0x0000FFFFUL) |                         \
                                   ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)) & 0xFFFFFFFFUL))

/** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3，
    and also combines bits [63:48] of val1 with bits [47:32] of val2 with the val3, finally pack the two new 32bits to 64bits. */
#define __PKHTB64(ARG1, ARG2, ARG3)  ((ARG3 == 0) ? __RV_PKTB16(ARG1, ARG2) :              \
                                   (ARG3 == 16) ? __RV_PKTT16(ARG1, ARG2) :              \
                                   ((uint64_t)(((uint32_t)((uint64_t)ARG1 >> 32) & 0xFFFF0000UL) |            \
                                   ((((uint32_t)((uint64_t)ARG2 >> 32)) >> (ARG3)) & 0x0000FFFFUL)) << 32) |  \
                                   ((uint64_t)(((uint32_t)(ARG1) & 0xFFFF0000UL) |                            \
                                   ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)) & 0xFFFFFFFFUL))
#else
/** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3，
    and also combines the [47:32] of val1 with bits[63:48] of val2 with the val3, finally pack the two new 32-bits to 64-bit. */
#define __PKHBT64(ARG1, ARG2, ARG3)  ((ARG3 == 0) ? __RV_DPKTB16(ARG2, ARG1) :             \
                                   (ARG3 == 16) ? __RV_DPKBB16(ARG2, ARG1) :              \
                                   ((int64_t)((((uint32_t)((uint64_t)ARG1 >> 32)) & 0x0000FFFFUL) |           \
                                   ((((uint32_t)((uint64_t)ARG2 >> 32)) << (ARG3)) & 0xFFFF0000UL)) << 32) |  \
                                   ((int64_t)(((((uint32_t)(ARG1))) & 0x0000FFFFUL) |                         \
                                   ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)) & 0xFFFFFFFFUL))

/** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3，
    and also combines bits [63:48] of val1 with bits [47:32] of val2 with the val3, finally pack the two new 32bits to 64bits. */
#define __PKHTB64(ARG1, ARG2, ARG3)  ((ARG3 == 0) ? __RV_DPKTB16(ARG1, ARG2) :              \
                                   (ARG3 == 16) ? __RV_DPKTT16(ARG1, ARG2) :              \
                                   ((uint64_t)(((uint32_t)((uint64_t)ARG1 >> 32) & 0xFFFF0000UL) |            \
                                   ((((uint32_t)((uint64_t)ARG2 >> 32)) >> (ARG3)) & 0x0000FFFFUL)) << 32) |  \
                                   ((uint64_t)(((uint32_t)(ARG1) & 0xFFFF0000UL) |                            \
                                   ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)) & 0xFFFFFFFFUL))
#endif /* __RISCV_XLEN == 64 */

/** first rotate then extract. This is more suitable for arm compiler for it can rotate and extract in one command*/
#define __SXTB16_RORn(ARG1, ARG2)   __RV_SUNPKD820(__ROR(ARG1, ARG2))

#endif /* defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) */

#ifdef __cplusplus
}
#endif

#endif /* __CORE_FEATURE_DSP__ */