core_feature_dsp.h 855 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949159501595115952159531595415955159561595715958159591596015961159621596315964159651596615967159681596915970159711597215973159741597515976159771597815979159801598115982159831598415985159861598715988159891599015991159921599315994159951599615997159981599916000160011600216003160041600516006160071600816009160101601116012160131601416015160161601716018160191602016021160221602316024160251602616027160281602916030160311603216033160341603516036160371603816039160401604116042160431604416045160461604716048160491605016051160521605316054160551605616057160581605916060160611606216063160641606516066160671606816069160701607116072160731607416075160761607716078160791608016081160821608316084160851608616087160881608916090160911609216093160941609516096160971609816099161001610116102161031610416105161061610716108161091611016111161121611316114161151611616117161181611916120161211612216123161241612516126161271612816129161301613116132161331613416135161361613716138161391614016141161421614316144161451614616147161481614916150161511615216153161541615516156161571615816159161601616116162161631616416165161661616716168161691617016171161721617316174161751617616177161781617916180161811618216183161841618516186161871618816189161901619116192161931619416195161961619716198161991620016201162021620316204162051620616207162081620916210162111621216213162141621516216162171621816219162201622116222162231622416225162261622716228162291623016231162321623316234162351623616237162381623916240162411624216243162441624516246162471624816249162501625116252162531625416255162561625716258162591626016261162621626316264162651626616267162681626916270162711627216273162741627516276162771627816279162801628116282162831628416285162861628716288162891629016291162921629316294162951629616297162981629916300163011630216303163041630516306163071630816309163101631116312163131631416315163161631716318163191632016321163221632316324163251632616327163281632916330163311633216333163341633516336163371633816339163401634116342163431634416345163461634716348163491635016351163521635316354163551635616357163581635916360163611636216363163641636516366163671636816369163701637116372163731637416375163761637716378163791638016381163821638316384163851638616387163881638916390163911639216393163941639516396163971639816399164001640116402164031640416405164061640716408164091641016411164121641316414164151641616417164181641916420164211642216423164241642516426164271642816429164301643116432164331643416435164361643716438164391644016441164421644316444164451644616447164481644916450164511645216453164541645516456164571645816459164601646116462164631646416465164661646716468164691647016471164721647316474164751647616477164781647916480164811648216483164841648516486164871648816489164901649116492164931649416495164961649716498164991650016501165021650316504165051650616507165081650916510165111651216513165141651516516165171651816519165201652116522165231652416525165261652716528165291653016531165321653316534165351653616537165381653916540165411654216543165441654516546165471654816549165501655116552165531655416555165561655716558165591656016561165621656316564165651656616567165681656916570165711657216573165741657516576165771657816579165801658116582165831658416585165861658716588165891659016591165921659316594165951659616597165981659916600166011660216603166041660516606166071660816609166101661116612166131661416615166161661716618166191662016621166221662316624166251662616627166281662916630166311663216633166341663516636166371663816639166401664116642166431664416645166461664716648166491665016651166521665316654166551665616657166581665916660166611666216663166641666516666166671666816669166701667116672166731667416675166761667716678166791668016681166821668316684166851668616687166881668916690166911669216693166941669516696166971669816699167001670116702167031670416705167061670716708167091671016711167121671316714167151671616717167181671916720167211672216723167241672516726167271672816729167301673116732167331673416735167361673716738167391674016741167421674316744167451674616747167481674916750167511675216753167541675516756167571675816759167601676116762167631676416765167661676716768167691677016771167721677316774167751677616777167781677916780167811678216783167841678516786167871678816789167901679116792167931679416795167961679716798167991680016801168021680316804168051680616807168081680916810168111681216813168141681516816168171681816819168201682116822168231682416825168261682716828168291683016831168321683316834168351683616837168381683916840168411684216843168441684516846168471684816849168501685116852168531685416855168561685716858168591686016861168621686316864168651686616867168681686916870168711687216873168741687516876168771687816879168801688116882168831688416885168861688716888168891689016891168921689316894168951689616897168981689916900169011690216903169041690516906169071690816909169101691116912169131691416915169161691716918169191692016921169221692316924169251692616927169281692916930169311693216933169341693516936169371693816939169401694116942169431694416945169461694716948169491695016951169521695316954169551695616957169581695916960169611696216963169641696516966169671696816969169701697116972169731697416975169761697716978169791698016981169821698316984169851698616987169881698916990169911699216993169941699516996169971699816999170001700117002170031700417005170061700717008170091701017011170121701317014170151701617017170181701917020170211702217023170241702517026170271702817029170301703117032170331703417035170361703717038170391704017041170421704317044170451704617047170481704917050170511705217053170541705517056170571705817059170601706117062170631706417065170661706717068170691707017071170721707317074170751707617077170781707917080170811708217083170841708517086170871708817089170901709117092170931709417095170961709717098170991710017101171021710317104171051710617107171081710917110171111711217113171141711517116171171711817119171201712117122171231712417125171261712717128171291713017131171321713317134171351713617137171381713917140171411714217143171441714517146171471714817149171501715117152171531715417155171561715717158171591716017161171621716317164171651716617167171681716917170171711717217173171741717517176171771717817179171801718117182171831718417185171861718717188171891719017191171921719317194171951719617197171981719917200172011720217203172041720517206172071720817209172101721117212172131721417215172161721717218172191722017221172221722317224172251722617227172281722917230172311723217233172341723517236172371723817239172401724117242172431724417245172461724717248172491725017251172521725317254172551725617257172581725917260172611726217263172641726517266172671726817269172701727117272172731727417275172761727717278172791728017281172821728317284172851728617287172881728917290172911729217293172941729517296172971729817299173001730117302173031730417305173061730717308173091731017311173121731317314173151731617317173181731917320173211732217323173241732517326173271732817329173301733117332173331733417335173361733717338173391734017341173421734317344173451734617347173481734917350173511735217353173541735517356173571735817359173601736117362173631736417365173661736717368173691737017371173721737317374173751737617377173781737917380173811738217383173841738517386173871738817389173901739117392173931739417395173961739717398173991740017401174021740317404174051740617407174081740917410174111741217413174141741517416174171741817419174201742117422174231742417425174261742717428174291743017431174321743317434174351743617437174381743917440174411744217443174441744517446174471744817449174501745117452174531745417455174561745717458174591746017461174621746317464174651746617467174681746917470174711747217473174741747517476174771747817479174801748117482174831748417485174861748717488174891749017491174921749317494174951749617497174981749917500175011750217503175041750517506175071750817509175101751117512175131751417515175161751717518175191752017521175221752317524175251752617527175281752917530175311753217533175341753517536175371753817539175401754117542175431754417545175461754717548175491755017551175521755317554175551755617557175581755917560175611756217563175641756517566175671756817569175701757117572175731757417575175761757717578175791758017581175821758317584175851758617587175881758917590175911759217593175941759517596175971759817599176001760117602176031760417605176061760717608176091761017611176121761317614176151761617617176181761917620176211762217623176241762517626176271762817629176301763117632176331763417635176361763717638176391764017641176421764317644176451764617647176481764917650176511765217653176541765517656176571765817659176601766117662176631766417665176661766717668176691767017671176721767317674176751767617677176781767917680176811768217683176841768517686176871768817689176901769117692176931769417695176961769717698176991770017701177021770317704177051770617707177081770917710177111771217713177141771517716177171771817719177201772117722177231772417725177261772717728177291773017731177321773317734177351773617737177381773917740177411774217743177441774517746177471774817749177501775117752177531775417755177561775717758177591776017761177621776317764177651776617767177681776917770177711777217773177741777517776177771777817779177801778117782177831778417785177861778717788177891779017791177921779317794177951779617797177981779917800178011780217803178041780517806178071780817809178101781117812178131781417815178161781717818178191782017821178221782317824178251782617827178281782917830178311783217833178341783517836178371783817839178401784117842178431784417845178461784717848178491785017851178521785317854178551785617857178581785917860178611786217863178641786517866178671786817869178701787117872178731787417875178761787717878178791788017881178821788317884178851788617887178881788917890178911789217893178941789517896178971789817899179001790117902179031790417905179061790717908179091791017911179121791317914179151791617917179181791917920179211792217923179241792517926179271792817929179301793117932179331793417935179361793717938179391794017941179421794317944179451794617947179481794917950179511795217953179541795517956179571795817959179601796117962179631796417965179661796717968179691797017971179721797317974179751797617977179781797917980179811798217983179841798517986179871798817989179901799117992179931799417995179961799717998179991800018001180021800318004180051800618007180081800918010180111801218013180141801518016180171801818019180201802118022180231802418025180261802718028180291803018031180321803318034180351803618037180381803918040180411804218043180441804518046180471804818049180501805118052180531805418055180561805718058180591806018061180621806318064180651806618067180681806918070180711807218073180741807518076180771807818079180801808118082180831808418085180861808718088180891809018091180921809318094180951809618097180981809918100181011810218103181041810518106181071810818109181101811118112181131811418115181161811718118181191812018121181221812318124181251812618127181281812918130181311813218133181341813518136181371813818139181401814118142181431814418145181461814718148181491815018151181521815318154181551815618157181581815918160181611816218163181641816518166181671816818169181701817118172181731817418175181761817718178181791818018181181821818318184181851818618187181881818918190181911819218193181941819518196181971819818199182001820118202182031820418205182061820718208182091821018211182121821318214182151821618217182181821918220182211822218223182241822518226182271822818229182301823118232182331823418235182361823718238182391824018241182421824318244182451824618247182481824918250182511825218253182541825518256182571825818259182601826118262182631826418265182661826718268182691827018271182721827318274182751827618277182781827918280182811828218283182841828518286182871828818289182901829118292182931829418295182961829718298182991830018301183021830318304183051830618307183081830918310183111831218313183141831518316183171831818319183201832118322183231832418325183261832718328183291833018331183321833318334183351833618337183381833918340183411834218343183441834518346183471834818349183501835118352183531835418355183561835718358183591836018361183621836318364183651836618367183681836918370183711837218373183741837518376183771837818379183801838118382183831838418385183861838718388183891839018391183921839318394183951839618397183981839918400184011840218403184041840518406184071840818409184101841118412184131841418415184161841718418184191842018421184221842318424184251842618427184281842918430184311843218433184341843518436184371843818439184401844118442184431844418445184461844718448184491845018451184521845318454184551845618457184581845918460184611846218463184641846518466184671846818469184701847118472184731847418475184761847718478184791848018481184821848318484184851848618487184881848918490184911849218493184941849518496184971849818499185001850118502185031850418505185061850718508185091851018511185121851318514185151851618517185181851918520185211852218523185241852518526185271852818529185301853118532185331853418535185361853718538185391854018541185421854318544185451854618547185481854918550185511855218553185541855518556185571855818559185601856118562185631856418565185661856718568185691857018571185721857318574185751857618577185781857918580185811858218583185841858518586185871858818589185901859118592185931859418595185961859718598185991860018601186021860318604186051860618607186081860918610186111861218613186141861518616186171861818619186201862118622186231862418625186261862718628186291863018631186321863318634186351863618637186381863918640186411864218643186441864518646186471864818649186501865118652186531865418655186561865718658186591866018661186621866318664186651866618667186681866918670186711867218673186741867518676186771867818679186801868118682186831868418685186861868718688186891869018691186921869318694186951869618697186981869918700187011870218703187041870518706187071870818709187101871118712187131871418715187161871718718187191872018721187221872318724187251872618727187281872918730187311873218733187341873518736187371873818739187401874118742187431874418745187461874718748187491875018751187521875318754187551875618757187581875918760187611876218763187641876518766187671876818769187701877118772187731877418775187761877718778187791878018781187821878318784187851878618787187881878918790187911879218793187941879518796187971879818799188001880118802188031880418805188061880718808188091881018811188121881318814188151881618817188181881918820188211882218823188241882518826188271882818829188301883118832188331883418835188361883718838188391884018841188421884318844188451884618847188481884918850188511885218853188541885518856188571885818859188601886118862188631886418865188661886718868188691887018871188721887318874188751887618877188781887918880188811888218883188841888518886188871888818889188901889118892188931889418895188961889718898188991890018901189021890318904189051890618907189081890918910189111891218913189141891518916189171891818919189201892118922189231892418925189261892718928189291893018931189321893318934189351893618937189381893918940189411894218943189441894518946189471894818949189501895118952189531895418955189561895718958189591896018961189621896318964189651896618967189681896918970189711897218973189741897518976189771897818979189801898118982189831898418985189861898718988189891899018991189921899318994189951899618997189981899919000190011900219003190041900519006190071900819009190101901119012190131901419015190161901719018190191902019021190221902319024190251902619027190281902919030190311903219033190341903519036190371903819039190401904119042190431904419045190461904719048190491905019051190521905319054190551905619057190581905919060190611906219063190641906519066190671906819069190701907119072190731907419075190761907719078190791908019081190821908319084190851908619087190881908919090190911909219093190941909519096190971909819099191001910119102191031910419105191061910719108191091911019111191121911319114191151911619117191181911919120191211912219123191241912519126191271912819129191301913119132191331913419135191361913719138191391914019141191421914319144191451914619147191481914919150191511915219153191541915519156191571915819159191601916119162191631916419165191661916719168191691917019171191721917319174191751917619177191781917919180191811918219183191841918519186191871918819189191901919119192191931919419195191961919719198191991920019201192021920319204192051920619207192081920919210192111921219213192141921519216192171921819219192201922119222192231922419225192261922719228192291923019231192321923319234192351923619237192381923919240192411924219243192441924519246192471924819249192501925119252192531925419255192561925719258192591926019261192621926319264192651926619267192681926919270192711927219273192741927519276192771927819279192801928119282192831928419285192861928719288192891929019291192921929319294192951929619297192981929919300193011930219303193041930519306193071930819309193101931119312193131931419315193161931719318193191932019321193221932319324193251932619327193281932919330193311933219333193341933519336193371933819339193401934119342193431934419345193461934719348193491935019351193521935319354193551935619357193581935919360193611936219363193641936519366193671936819369193701937119372193731937419375193761937719378193791938019381193821938319384193851938619387193881938919390193911939219393193941939519396193971939819399194001940119402194031940419405194061940719408194091941019411194121941319414194151941619417194181941919420194211942219423194241942519426194271942819429194301943119432194331943419435194361943719438194391944019441194421944319444194451944619447194481944919450194511945219453194541945519456194571945819459194601946119462194631946419465194661946719468194691947019471194721947319474194751947619477194781947919480194811948219483194841948519486194871948819489194901949119492194931949419495194961949719498194991950019501195021950319504195051950619507195081950919510195111951219513195141951519516195171951819519195201952119522195231952419525195261952719528195291953019531195321953319534195351953619537195381953919540195411954219543195441954519546195471954819549195501955119552195531955419555195561955719558195591956019561195621956319564195651956619567195681956919570195711957219573195741957519576195771957819579195801958119582195831958419585195861958719588195891959019591195921959319594195951959619597195981959919600196011960219603196041960519606196071960819609196101961119612196131961419615196161961719618196191962019621196221962319624196251962619627196281962919630196311963219633196341963519636196371963819639196401964119642196431964419645196461964719648196491965019651196521965319654196551965619657196581965919660196611966219663196641966519666196671966819669196701967119672196731967419675196761967719678196791968019681196821968319684196851968619687196881968919690196911969219693196941969519696196971969819699197001970119702197031970419705197061970719708197091971019711197121971319714197151971619717197181971919720197211972219723197241972519726197271972819729197301973119732197331973419735197361973719738197391974019741197421974319744197451974619747197481974919750197511975219753197541975519756197571975819759197601976119762197631976419765197661976719768197691977019771197721977319774197751977619777197781977919780197811978219783197841978519786197871978819789197901979119792197931979419795197961979719798197991980019801198021980319804198051980619807198081980919810198111981219813198141981519816198171981819819198201982119822198231982419825198261982719828198291983019831198321983319834198351983619837198381983919840198411984219843198441984519846198471984819849198501985119852198531985419855198561985719858198591986019861198621986319864198651986619867198681986919870198711987219873198741987519876198771987819879198801988119882198831988419885198861988719888198891989019891198921989319894198951989619897198981989919900199011990219903199041990519906199071990819909199101991119912199131991419915199161991719918199191992019921199221992319924199251992619927199281992919930199311993219933199341993519936199371993819939199401994119942199431994419945199461994719948199491995019951199521995319954199551995619957199581995919960199611996219963199641996519966199671996819969199701997119972199731997419975199761997719978199791998019981199821998319984199851998619987199881998919990199911999219993199941999519996199971999819999200002000120002200032000420005200062000720008200092001020011200122001320014200152001620017200182001920020200212002220023200242002520026200272002820029200302003120032200332003420035200362003720038200392004020041200422004320044200452004620047200482004920050200512005220053200542005520056200572005820059200602006120062200632006420065200662006720068200692007020071200722007320074200752007620077200782007920080200812008220083200842008520086200872008820089200902009120092200932009420095200962009720098200992010020101201022010320104201052010620107201082010920110201112011220113201142011520116201172011820119201202012120122201232012420125201262012720128201292013020131201322013320134201352013620137201382013920140201412014220143201442014520146201472014820149201502015120152201532015420155201562015720158201592016020161201622016320164201652016620167201682016920170201712017220173201742017520176201772017820179201802018120182201832018420185201862018720188201892019020191201922019320194201952019620197201982019920200202012020220203202042020520206202072020820209202102021120212202132021420215202162021720218202192022020221202222022320224202252022620227202282022920230202312023220233202342023520236202372023820239202402024120242202432024420245202462024720248202492025020251202522025320254202552025620257202582025920260202612026220263202642026520266202672026820269202702027120272202732027420275202762027720278202792028020281202822028320284202852028620287202882028920290202912029220293202942029520296202972029820299203002030120302203032030420305203062030720308203092031020311203122031320314203152031620317203182031920320203212032220323203242032520326203272032820329203302033120332203332033420335203362033720338203392034020341203422034320344203452034620347203482034920350203512035220353203542035520356203572035820359203602036120362203632036420365203662036720368203692037020371203722037320374203752037620377203782037920380203812038220383203842038520386203872038820389203902039120392203932039420395203962039720398203992040020401204022040320404204052040620407204082040920410204112041220413204142041520416204172041820419204202042120422204232042420425204262042720428204292043020431204322043320434204352043620437204382043920440204412044220443204442044520446204472044820449204502045120452204532045420455204562045720458204592046020461204622046320464204652046620467204682046920470204712047220473204742047520476204772047820479204802048120482204832048420485204862048720488204892049020491204922049320494204952049620497204982049920500205012050220503205042050520506205072050820509205102051120512205132051420515205162051720518205192052020521205222052320524205252052620527205282052920530205312053220533205342053520536205372053820539205402054120542205432054420545205462054720548205492055020551205522055320554205552055620557205582055920560205612056220563205642056520566205672056820569205702057120572205732057420575205762057720578205792058020581205822058320584205852058620587205882058920590205912059220593205942059520596205972059820599206002060120602206032060420605206062060720608206092061020611206122061320614206152061620617206182061920620206212062220623206242062520626206272062820629206302063120632206332063420635206362063720638206392064020641206422064320644206452064620647206482064920650206512065220653206542065520656206572065820659206602066120662206632066420665206662066720668206692067020671206722067320674206752067620677206782067920680206812068220683206842068520686206872068820689206902069120692206932069420695206962069720698206992070020701207022070320704207052070620707207082070920710207112071220713207142071520716207172071820719207202072120722207232072420725207262072720728207292073020731207322073320734207352073620737207382073920740207412074220743207442074520746207472074820749207502075120752207532075420755207562075720758207592076020761207622076320764207652076620767207682076920770207712077220773207742077520776207772077820779207802078120782207832078420785207862078720788207892079020791207922079320794207952079620797207982079920800208012080220803208042080520806208072080820809208102081120812208132081420815208162081720818208192082020821208222082320824208252082620827208282082920830208312083220833208342083520836208372083820839208402084120842208432084420845208462084720848208492085020851208522085320854208552085620857208582085920860208612086220863208642086520866208672086820869208702087120872208732087420875208762087720878208792088020881208822088320884208852088620887208882088920890208912089220893208942089520896208972089820899209002090120902209032090420905209062090720908209092091020911209122091320914209152091620917209182091920920209212092220923209242092520926209272092820929209302093120932209332093420935209362093720938209392094020941209422094320944209452094620947209482094920950209512095220953209542095520956209572095820959209602096120962209632096420965209662096720968209692097020971209722097320974209752097620977209782097920980209812098220983209842098520986209872098820989209902099120992209932099420995209962099720998209992100021001210022100321004210052100621007210082100921010210112101221013210142101521016210172101821019210202102121022210232102421025210262102721028210292103021031210322103321034210352103621037210382103921040210412104221043210442104521046210472104821049210502105121052210532105421055210562105721058210592106021061210622106321064210652106621067210682106921070210712107221073210742107521076210772107821079210802108121082210832108421085210862108721088210892109021091210922109321094210952109621097210982109921100211012110221103211042110521106211072110821109211102111121112211132111421115211162111721118211192112021121211222112321124211252112621127211282112921130211312113221133211342113521136211372113821139211402114121142211432114421145211462114721148211492115021151211522115321154211552115621157211582115921160211612116221163211642116521166211672116821169211702117121172211732117421175211762117721178211792118021181211822118321184211852118621187211882118921190211912119221193211942119521196211972119821199212002120121202212032120421205212062120721208212092121021211212122121321214212152121621217212182121921220212212122221223212242122521226212272122821229212302123121232212332123421235212362123721238212392124021241212422124321244212452124621247212482124921250212512125221253212542125521256212572125821259212602126121262212632126421265212662126721268212692127021271212722127321274212752127621277212782127921280212812128221283212842128521286212872128821289212902129121292212932129421295212962129721298212992130021301213022130321304213052130621307213082130921310213112131221313213142131521316213172131821319213202132121322213232132421325213262132721328213292133021331213322133321334213352133621337213382133921340213412134221343213442134521346213472134821349213502135121352213532135421355213562135721358213592136021361213622136321364213652136621367213682136921370213712137221373213742137521376213772137821379213802138121382213832138421385213862138721388213892139021391213922139321394213952139621397213982139921400214012140221403214042140521406214072140821409214102141121412214132141421415214162141721418214192142021421214222142321424214252142621427214282142921430214312143221433214342143521436214372143821439214402144121442214432144421445214462144721448214492145021451214522145321454214552145621457214582145921460214612146221463214642146521466214672146821469214702147121472214732147421475214762147721478214792148021481214822148321484214852148621487214882148921490214912149221493214942149521496214972149821499215002150121502215032150421505215062150721508215092151021511215122151321514215152151621517215182151921520215212152221523215242152521526215272152821529215302153121532215332153421535215362153721538215392154021541215422154321544215452154621547215482154921550215512155221553215542155521556215572155821559215602156121562215632156421565215662156721568215692157021571215722157321574215752157621577215782157921580215812158221583215842158521586215872158821589215902159121592215932159421595215962159721598215992160021601216022160321604216052160621607216082160921610216112161221613216142161521616216172161821619216202162121622216232162421625216262162721628216292163021631216322163321634216352163621637216382163921640216412164221643216442164521646216472164821649216502165121652216532165421655216562165721658216592166021661216622166321664216652166621667216682166921670216712167221673216742167521676216772167821679216802168121682216832168421685216862168721688216892169021691216922169321694216952169621697216982169921700217012170221703217042170521706217072170821709217102171121712217132171421715217162171721718217192172021721217222172321724217252172621727217282172921730217312173221733217342173521736217372173821739217402174121742217432174421745217462174721748217492175021751217522175321754217552175621757217582175921760217612176221763217642176521766217672176821769217702177121772217732177421775217762177721778217792178021781217822178321784217852178621787217882178921790217912179221793217942179521796217972179821799218002180121802218032180421805218062180721808218092181021811218122181321814218152181621817218182181921820218212182221823218242182521826218272182821829218302183121832218332183421835218362183721838218392184021841218422184321844218452184621847218482184921850218512185221853218542185521856218572185821859218602186121862218632186421865218662186721868218692187021871218722187321874218752187621877218782187921880218812188221883218842188521886218872188821889218902189121892218932189421895218962189721898218992190021901219022190321904219052190621907219082190921910219112191221913219142191521916219172191821919219202192121922219232192421925219262192721928219292193021931219322193321934219352193621937219382193921940219412194221943219442194521946219472194821949219502195121952219532195421955219562195721958219592196021961219622196321964219652196621967219682196921970219712197221973219742197521976219772197821979219802198121982219832198421985219862198721988219892199021991219922199321994219952199621997219982199922000220012200222003220042200522006220072200822009220102201122012220132201422015220162201722018220192202022021220222202322024220252202622027220282202922030220312203222033220342203522036220372203822039220402204122042220432204422045220462204722048220492205022051220522205322054220552205622057220582205922060220612206222063220642206522066220672206822069220702207122072220732207422075220762207722078220792208022081220822208322084220852208622087220882208922090220912209222093220942209522096220972209822099221002210122102221032210422105221062210722108221092211022111221122211322114221152211622117221182211922120221212212222123221242212522126221272212822129221302213122132221332213422135221362213722138221392214022141221422214322144221452214622147221482214922150221512215222153221542215522156221572215822159221602216122162221632216422165221662216722168221692217022171221722217322174221752217622177221782217922180221812218222183221842218522186221872218822189221902219122192221932219422195221962219722198221992220022201222022220322204222052220622207222082220922210222112221222213222142221522216222172221822219222202222122222222232222422225222262222722228222292223022231222322223322234222352223622237222382223922240222412224222243222442224522246222472224822249222502225122252222532225422255222562225722258222592226022261222622226322264222652226622267222682226922270222712227222273222742227522276222772227822279222802228122282222832228422285222862228722288222892229022291222922229322294222952229622297222982229922300223012230222303223042230522306223072230822309223102231122312223132231422315223162231722318223192232022321223222232322324223252232622327223282232922330223312233222333223342233522336223372233822339223402234122342223432234422345223462234722348223492235022351223522235322354223552235622357223582235922360223612236222363223642236522366223672236822369223702237122372223732237422375223762237722378223792238022381223822238322384223852238622387223882238922390223912239222393223942239522396223972239822399224002240122402224032240422405224062240722408224092241022411224122241322414224152241622417224182241922420224212242222423224242242522426224272242822429224302243122432224332243422435224362243722438224392244022441224422244322444224452244622447224482244922450224512245222453224542245522456224572245822459224602246122462224632246422465224662246722468224692247022471224722247322474224752247622477224782247922480224812248222483224842248522486224872248822489224902249122492224932249422495224962249722498224992250022501225022250322504225052250622507225082250922510225112251222513225142251522516225172251822519225202252122522225232252422525225262252722528225292253022531225322253322534225352253622537225382253922540225412254222543225442254522546225472254822549225502255122552225532255422555225562255722558225592256022561225622256322564225652256622567225682256922570225712257222573225742257522576225772257822579225802258122582225832258422585225862258722588225892259022591225922259322594225952259622597225982259922600226012260222603226042260522606226072260822609226102261122612226132261422615226162261722618226192262022621226222262322624226252262622627226282262922630226312263222633226342263522636226372263822639226402264122642226432264422645226462264722648226492265022651226522265322654226552265622657226582265922660226612266222663226642266522666226672266822669226702267122672226732267422675226762267722678226792268022681226822268322684226852268622687226882268922690226912269222693226942269522696226972269822699227002270122702227032270422705227062270722708227092271022711227122271322714227152271622717227182271922720227212272222723227242272522726227272272822729227302273122732227332273422735227362273722738227392274022741227422274322744227452274622747227482274922750227512275222753227542275522756227572275822759227602276122762227632276422765227662276722768227692277022771227722277322774227752277622777227782277922780227812278222783227842278522786227872278822789227902279122792227932279422795227962279722798227992280022801228022280322804228052280622807228082280922810228112281222813228142281522816228172281822819228202282122822228232282422825228262282722828228292283022831228322283322834228352283622837228382283922840228412284222843228442284522846228472284822849228502285122852228532285422855228562285722858228592286022861228622286322864228652286622867228682286922870228712287222873228742287522876228772287822879228802288122882228832288422885228862288722888228892289022891228922289322894228952289622897228982289922900229012290222903229042290522906229072290822909229102291122912229132291422915229162291722918229192292022921229222292322924229252292622927229282292922930229312293222933229342293522936229372293822939229402294122942229432294422945229462294722948229492295022951229522295322954229552295622957229582295922960229612296222963229642296522966229672296822969229702297122972229732297422975229762297722978229792298022981229822298322984229852298622987229882298922990229912299222993229942299522996229972299822999230002300123002230032300423005230062300723008230092301023011230122301323014230152301623017230182301923020230212302223023230242302523026230272302823029230302303123032230332303423035230362303723038230392304023041230422304323044230452304623047230482304923050230512305223053230542305523056230572305823059230602306123062230632306423065230662306723068230692307023071230722307323074230752307623077230782307923080230812308223083230842308523086230872308823089230902309123092230932309423095230962309723098230992310023101231022310323104231052310623107231082310923110231112311223113231142311523116231172311823119231202312123122231232312423125231262312723128231292313023131231322313323134231352313623137231382313923140231412314223143231442314523146231472314823149231502315123152231532315423155231562315723158231592316023161231622316323164231652316623167231682316923170231712317223173231742317523176231772317823179231802318123182231832318423185231862318723188231892319023191231922319323194231952319623197231982319923200232012320223203232042320523206232072320823209232102321123212232132321423215232162321723218232192322023221232222322323224232252322623227232282322923230232312323223233232342323523236232372323823239232402324123242232432324423245232462324723248232492325023251232522325323254232552325623257232582325923260232612326223263232642326523266232672326823269232702327123272232732327423275232762327723278232792328023281232822328323284232852328623287232882328923290232912329223293232942329523296232972329823299233002330123302233032330423305233062330723308233092331023311233122331323314233152331623317233182331923320233212332223323233242332523326233272332823329233302333123332233332333423335233362333723338233392334023341233422334323344233452334623347233482334923350233512335223353233542335523356233572335823359233602336123362233632336423365233662336723368233692337023371233722337323374233752337623377233782337923380233812338223383233842338523386233872338823389233902339123392233932339423395233962339723398233992340023401234022340323404234052340623407234082340923410234112341223413234142341523416234172341823419234202342123422234232342423425234262342723428234292343023431234322343323434234352343623437234382343923440234412344223443234442344523446234472344823449234502345123452234532345423455234562345723458234592346023461234622346323464234652346623467234682346923470234712347223473234742347523476234772347823479234802348123482234832348423485234862348723488234892349023491234922349323494234952349623497234982349923500235012350223503235042350523506235072350823509235102351123512235132351423515235162351723518235192352023521235222352323524235252352623527235282352923530235312353223533235342353523536235372353823539235402354123542235432354423545235462354723548235492355023551235522355323554235552355623557235582355923560235612356223563235642356523566235672356823569235702357123572235732357423575235762357723578235792358023581235822358323584235852358623587235882358923590235912359223593235942359523596235972359823599236002360123602236032360423605236062360723608236092361023611236122361323614236152361623617236182361923620236212362223623236242362523626236272362823629236302363123632236332363423635236362363723638236392364023641236422364323644236452364623647236482364923650236512365223653236542365523656236572365823659236602366123662236632366423665236662366723668236692367023671236722367323674236752367623677236782367923680236812368223683236842368523686236872368823689236902369123692236932369423695236962369723698236992370023701237022370323704237052370623707237082370923710237112371223713237142371523716237172371823719237202372123722237232372423725237262372723728237292373023731237322373323734237352373623737237382373923740237412374223743237442374523746237472374823749237502375123752237532375423755237562375723758237592376023761237622376323764237652376623767237682376923770237712377223773237742377523776237772377823779237802378123782237832378423785237862378723788237892379023791237922379323794237952379623797237982379923800238012380223803238042380523806238072380823809238102381123812238132381423815238162381723818238192382023821238222382323824238252382623827238282382923830238312383223833238342383523836238372383823839238402384123842238432384423845238462384723848238492385023851238522385323854238552385623857238582385923860238612386223863238642386523866238672386823869238702387123872238732387423875238762387723878238792388023881238822388323884238852388623887238882388923890238912389223893238942389523896238972389823899239002390123902239032390423905239062390723908239092391023911239122391323914239152391623917239182391923920239212392223923239242392523926239272392823929239302393123932239332393423935239362393723938239392394023941239422394323944239452394623947239482394923950239512395223953239542395523956239572395823959239602396123962239632396423965239662396723968239692397023971239722397323974239752397623977239782397923980239812398223983239842398523986239872398823989239902399123992239932399423995239962399723998239992400024001240022400324004240052400624007240082400924010240112401224013240142401524016240172401824019240202402124022240232402424025240262402724028240292403024031240322403324034240352403624037240382403924040240412404224043240442404524046240472404824049240502405124052240532405424055240562405724058240592406024061240622406324064240652406624067240682406924070240712407224073240742407524076240772407824079240802408124082240832408424085240862408724088240892409024091240922409324094240952409624097240982409924100241012410224103241042410524106241072410824109241102411124112241132411424115241162411724118241192412024121241222412324124241252412624127241282412924130241312413224133241342413524136241372413824139241402414124142241432414424145241462414724148241492415024151241522415324154241552415624157241582415924160241612416224163241642416524166241672416824169241702417124172241732417424175241762417724178241792418024181241822418324184241852418624187241882418924190241912419224193241942419524196241972419824199242002420124202242032420424205242062420724208242092421024211242122421324214242152421624217242182421924220242212422224223242242422524226242272422824229242302423124232242332423424235242362423724238242392424024241242422424324244242452424624247242482424924250242512425224253242542425524256242572425824259242602426124262242632426424265242662426724268242692427024271242722427324274242752427624277242782427924280242812428224283242842428524286242872428824289242902429124292242932429424295242962429724298242992430024301243022430324304243052430624307243082430924310243112431224313243142431524316243172431824319243202432124322243232432424325243262432724328243292433024331243322433324334243352433624337243382433924340243412434224343243442434524346243472434824349243502435124352243532435424355243562435724358243592436024361243622436324364243652436624367243682436924370243712437224373243742437524376243772437824379243802438124382243832438424385243862438724388243892439024391243922439324394243952439624397243982439924400244012440224403244042440524406244072440824409244102441124412244132441424415244162441724418244192442024421244222442324424244252442624427244282442924430244312443224433244342443524436244372443824439244402444124442244432444424445244462444724448244492445024451244522445324454244552445624457244582445924460244612446224463244642446524466244672446824469244702447124472244732447424475244762447724478244792448024481244822448324484244852448624487244882448924490244912449224493244942449524496244972449824499245002450124502245032450424505245062450724508245092451024511245122451324514245152451624517245182451924520245212452224523245242452524526245272452824529245302453124532245332453424535245362453724538245392454024541245422454324544245452454624547245482454924550245512455224553245542455524556245572455824559245602456124562245632456424565245662456724568245692457024571245722457324574245752457624577245782457924580245812458224583245842458524586245872458824589245902459124592245932459424595245962459724598245992460024601246022460324604246052460624607246082460924610246112461224613246142461524616246172461824619246202462124622246232462424625246262462724628246292463024631246322463324634246352463624637246382463924640246412464224643246442464524646246472464824649246502465124652246532465424655246562465724658246592466024661246622466324664246652466624667246682466924670246712467224673246742467524676246772467824679246802468124682246832468424685246862468724688246892469024691246922469324694246952469624697246982469924700247012470224703247042470524706247072470824709247102471124712247132471424715247162471724718247192472024721247222472324724247252472624727247282472924730247312473224733247342473524736247372473824739247402474124742247432474424745247462474724748247492475024751247522475324754247552475624757247582475924760247612476224763247642476524766247672476824769247702477124772247732477424775247762477724778247792478024781247822478324784247852478624787247882478924790247912479224793247942479524796247972479824799248002480124802248032480424805248062480724808248092481024811248122481324814248152481624817248182481924820248212482224823248242482524826248272482824829248302483124832248332483424835248362483724838248392484024841248422484324844248452484624847248482484924850248512485224853248542485524856248572485824859248602486124862248632486424865248662486724868248692487024871248722487324874248752487624877248782487924880248812488224883248842488524886248872488824889248902489124892248932489424895248962489724898248992490024901249022490324904249052490624907249082490924910249112491224913249142491524916249172491824919249202492124922249232492424925249262492724928249292493024931249322493324934
  1. /*
  2. * Copyright (c) 2019 Nuclei Limited. All rights reserved.
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the License); you may
  7. * not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  14. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #ifndef __CORE_FEATURE_DSP__
  19. #define __CORE_FEATURE_DSP__
  20. /*!
  21. * @file core_feature_dsp.h
  22. * @brief DSP feature API header file for Nuclei N/NX Core
  23. */
  24. /*
  25. * DSP Feature Configuration Macro:
  26. * 1. __DSP_PRESENT: Define whether Digital Signal Processing Unit(DSP) is present or not
  27. * * 0: Not present
  28. * * 1: Present
  29. */
  30. #ifdef __cplusplus
  31. extern "C" {
  32. #endif
  33. #include "core_feature_base.h"
  34. #if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1)
  35. #if defined(__INC_INTRINSIC_API) && (__INC_INTRINSIC_API == 1)
  36. #ifndef __ICCRISCV__
  37. #include <rvp_intrinsic.h>
  38. #endif
  39. #endif
  40. #ifndef __ICCRISCV__
  41. /* ########################### CPU SIMD DSP Intrinsic Functions ########################### */
  42. /**
  43. * \defgroup NMSIS_Core_DSP_Intrinsic Intrinsic Functions for SIMD Instructions
  44. * \ingroup NMSIS_Core
  45. * \brief Functions that generate RISC-V DSP SIMD instructions.
  46. * \details
  47. *
  48. * The following functions generate specified RISC-V SIMD instructions that cannot be directly accessed by compiler.
  49. * * **DSP ISA Extension Instruction Summary**
  50. * + **Shorthand Definitions**
  51. * - r.H == rH1: r[31:16], r.L == r.H0: r[15:0]
  52. * - r.B3: r[31:24], r.B2: r[23:16], r.B1: r[15:8], r.B0: r[7:0]
  53. * - r.B[x]: r[(x*8+7):(x*8+0)]
  54. * - r.H[x]: r[(x*16+7):(x*16+0)]
  55. * - r.W[x]: r[(x*32+31):(x*32+0)]
  56. * - r[xU]: the upper 32-bit of a 64-bit number; xU represents the GPR number that contains this upper part 32-bit value.
  57. * - r[xL]: the lower 32-bit of a 64-bit number; xL represents the GPR number that contains this lower part 32-bit value.
  58. * - r[xU].r[xL]: a 64-bit number that is formed from a pair of GPRs.
  59. * - s>>: signed arithmetic right shift:
  60. * - u>>: unsigned logical right shift
  61. * - SAT.Qn(): Saturate to the range of [-2^n, 2^n-1], if saturation happens, set PSW.OV.
  62. * - SAT.Um(): Saturate to the range of [0, 2^m-1], if saturation happens, set PSW.OV.
  63. * - RUND(): Indicate `rounding`, i.e., add 1 to the most significant discarded bit for right shift or MSW-type multiplication instructions.
  64. * - Sign or Zero Extending functions:
  65. * - SEm(data): Sign-Extend data to m-bit.:
  66. * - ZEm(data): Zero-Extend data to m-bit.
  67. * - ABS(x): Calculate the absolute value of `x`.
  68. * - CONCAT(x,y): Concatinate `x` and `y` to form a value.
  69. * - u<: Unsinged less than comparison.
  70. * - u<=: Unsinged less than & equal comparison.
  71. * - u>: Unsinged greater than comparison.
  72. * - s*: Signed multiplication.
  73. * - u*: Unsigned multiplication.
  74. *
  75. * @{
  76. */
  77. /** @} */ /* End of Doxygen Group NMSIS_Core_DSP_Intrinsic */
  78. /**
  79. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS SIMD Data Processing Instructions
  80. * \ingroup NMSIS_Core_DSP_Intrinsic
  81. * \brief SIMD Data Processing Instructions
  82. * \details
  83. */
  84. /**
  85. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB SIMD 16-bit Add/Subtract Instructions
  86. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  87. * \brief SIMD 16-bit Add/Subtract Instructions
  88. * \details
  89. * Based on the combination of the types of the two 16-bit arithmetic operations, the SIMD 16-bit
  90. * add/subtract instructions can be classified into 6 main categories: Addition (two 16-bit addition),
  91. * Subtraction (two 16-bit subtraction), Crossed Add & Sub (one addition and one subtraction), and
  92. * Crossed Sub & Add (one subtraction and one addition), Straight Add & Sub (one addition and one
  93. * subtraction), and Straight Sub & Add (one subtraction and one addition).
  94. * Based on the way of how an overflow condition is handled, the SIMD 16-bit add/subtract
  95. * instructions can be classified into 5 groups: Wrap-around (dropping overflow), Signed Halving
  96. * (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed Saturation (clipping overflow),
  97. * and Unsigned Saturation.
  98. * Together, there are 30 SIMD 16-bit add/subtract instructions.
  99. */
  100. /**
  101. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB SIMD 8-bit Addition & Subtraction Instructions
  102. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  103. * \brief SIMD 8-bit Addition & Subtraction Instructions
  104. * \details
  105. * Based on the types of the four 8-bit arithmetic operations, the SIMD 8-bit add/subtract instructions
  106. * can be classified into 2 main categories: Addition (four 8-bit addition), and Subtraction (four 8-bit
  107. * subtraction).
  108. * Based on the way of how an overflow condition is handled for singed or unsigned operation, the
  109. * SIMD 8-bit add/subtract instructions can be classified into 5 groups: Wrap-around (dropping
  110. * overflow), Signed Halving (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed
  111. * Saturation (clipping overflow), and Unsigned Saturation.
  112. * Together, there are 10 SIMD 8-bit add/subtract instructions.
  113. */
  114. /**
  115. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT SIMD 16-bit Shift Instructions
  116. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  117. * \brief SIMD 16-bit Shift Instructions
  118. * \details
  119. * there are 14 SIMD 16-bit shift instructions.
  120. */
  121. /**
  122. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT SIMD 8-bit Shift Instructions
  123. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  124. * \brief SIMD 8-bit Shift Instructions
  125. * \details
  126. * there are 14 SIMD 8-bit shift instructions.
  127. */
  128. /**
  129. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP SIMD 16-bit Compare Instructions
  130. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  131. * \brief SIMD 16-bit Compare Instructions
  132. * \details
  133. * there are 5 SIMD 16-bit Compare instructions.
  134. */
  135. /**
  136. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP SIMD 8-bit Compare Instructions
  137. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  138. * \brief SIMD 8-bit Compare Instructions
  139. * \details
  140. * there are 5 SIMD 8-bit Compare instructions.
  141. */
  142. /**
  143. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY SIMD 16-bit Multiply Instructions
  144. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  145. * \brief SIMD 16-bit Multiply Instructions
  146. * \details
  147. * there are 6 SIMD 16-bit Multiply instructions.
  148. */
  149. /**
  150. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY SIMD 8-bit Multiply Instructions
  151. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  152. * \brief SIMD 8-bit Multiply Instructions
  153. * \details
  154. * there are 6 SIMD 8-bit Multiply instructions.
  155. */
  156. /**
  157. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC SIMD 16-bit Miscellaneous Instructions
  158. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  159. * \brief SIMD 16-bit Miscellaneous Instructions
  160. * \details
  161. * there are 10 SIMD 16-bit Misc instructions.
  162. */
  163. /**
  164. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC SIMD 8-bit Miscellaneous Instructions
  165. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  166. * \brief SIMD 8-bit Miscellaneous Instructions
  167. * \details
  168. * there are 10 SIMD 8-bit Miscellaneous instructions.
  169. */
  170. /**
  171. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK SIMD 8-bit Unpacking Instructions
  172. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  173. * \brief SIMD 8-bit Unpacking Instructions
  174. * \details
  175. * there are 8 SIMD 8-bit Unpacking instructions.
  176. */
  177. /**
  178. * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD Non-SIMD Instructions
  179. * \ingroup NMSIS_Core_DSP_Intrinsic
  180. * \brief Non-SIMD Instructions
  181. * \details
  182. */
  183. /**
  184. * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU Non-SIMD Q15 saturation ALU Instructions
  185. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
  186. * \brief Non-SIMD Q15 saturation ALU Instructions
  187. * \details
  188. * there are 7 Non-SIMD Q15 saturation ALU Instructions
  189. */
  190. /**
  191. * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU Non-SIMD Q31 saturation ALU Instructions
  192. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
  193. * \brief Non-SIMD Q31 saturation ALU Instructions
  194. * \details
  195. * there are Non-SIMD Q31 saturation ALU Instructions
  196. */
  197. /**
  198. * \defgroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION 32-bit Computation Instructions
  199. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
  200. * \brief 32-bit Computation Instructions
  201. * \details
  202. * there are 8 32-bit Computation Instructions
  203. */
  204. /**
  205. * \defgroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC OV (Overflow) flag Set/Clear Instructions
  206. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
  207. * \brief OV (Overflow) flag Set/Clear Instructions
  208. * \details
  209. * The following table lists the user instructions related to Overflow (OV) flag manipulation. there are 2 OV (Overflow) flag Set/Clear Instructions
  210. */
  211. /**
  212. * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC Non-SIMD Miscellaneous Instructions
  213. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
  214. * \brief Non-SIMD Miscellaneous Instructions
  215. * \details
  216. * There are 13 Miscellaneous Instructions here.
  217. */
  218. /**
  219. * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS Partial-SIMD Data Processing Instructions
  220. * \ingroup NMSIS_Core_DSP_Intrinsic
  221. * \brief Partial-SIMD Data Processing Instructions
  222. * \details
  223. */
  224. /**
  225. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK SIMD 16-bit Packing Instructions
  226. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
  227. * \brief SIMD 16-bit Packing Instructions
  228. * \details
  229. * there are 4 SIMD16-bit Packing Instructions.
  230. */
  231. /**
  232. * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC Signed MSW 32x32 Multiply and Add Instructions
  233. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
  234. * \brief Signed MSW 32x32 Multiply and Add Instructions
  235. * \details
  236. * there are 8 Signed MSW 32x32 Multiply and Add Instructions
  237. */
  238. /**
  239. * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC Signed MSW 32x16 Multiply and Add Instructions
  240. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
  241. * \brief Signed MSW 32x16 Multiply and Add Instructions
  242. * \details
  243. * there are 15 Signed MSW 32x16 Multiply and Add Instructions
  244. */
  245. /**
  246. * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB Signed 16-bit Multiply 32-bit Add/Subtract Instructions
  247. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
  248. * \brief Signed 16-bit Multiply 32-bit Add/Subtract Instructions
  249. * \details
  250. * there are 18 Signed 16-bit Multiply 32-bit Add/Subtract Instructions
  251. */
  252. /**
  253. * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB Signed 16-bit Multiply 64-bit Add/Subtract Instructions
  254. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
  255. * \brief Signed 16-bit Multiply 64-bit Add/Subtract Instructions
  256. * \details
  257. * there is Signed 16-bit Multiply 64-bit Add/Subtract Instructions
  258. */
  259. /**
  260. * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC Partial-SIMD Miscellaneous Instructions
  261. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
  262. * \brief Partial-SIMD Miscellaneous Instructions
  263. * \details
  264. * there are 7 Partial-SIMD Miscellaneous Instructions
  265. */
  266. /**
  267. * \defgroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD 8-bit Multiply with 32-bit Add Instructions
  268. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
  269. * \brief 8-bit Multiply with 32-bit Add Instructions
  270. * \details
  271. * there are 3 8-bit Multiply with 32-bit Add Instructions
  272. */
  273. /**
  274. * \defgroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE 64-bit Profile Instructions
  275. * \ingroup NMSIS_Core_DSP_Intrinsic
  276. * \brief 64-bit Profile Instructions
  277. * \details
  278. */
  279. /**
  280. * \defgroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB 64-bit Addition & Subtraction Instructions
  281. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE
  282. * \brief 64-bit Addition & Subtraction Instructions
  283. * \details
  284. * there are 10 64-bit Addition & Subtraction Instructions.
  285. */
  286. /**
  287. * \defgroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB 32-bit Multiply with 64-bit Add/Subtract Instructions
  288. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE
  289. * \brief 32-bit Multiply with 64-bit Add/Subtract Instructions
  290. * \details
  291. * there are 32-bit Multiply 64-bit Add/Subtract Instructions
  292. */
  293. /**
  294. * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
  295. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE
  296. * \brief Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
  297. * \details
  298. * there are 10 Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
  299. */
  300. /**
  301. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY RV64 Only Instructions
  302. * \ingroup NMSIS_Core_DSP_Intrinsic
  303. * \brief RV64 Only Instructions
  304. * \details
  305. */
  306. /**
  307. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB (RV64 Only) SIMD 32-bit Add/Subtract Instructions
  308. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  309. * \brief (RV64 Only) SIMD 32-bit Add/Subtract Instructions
  310. * \details
  311. * The following tables list instructions that are only present in RV64.
  312. * There are 30 SIMD 32-bit addition or subtraction instructions.there are 4 SIMD16-bit Packing Instructions.
  313. */
  314. /**
  315. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT (RV64 Only) SIMD 32-bit Shift Instructions
  316. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  317. * \brief (RV64 Only) SIMD 32-bit Shift Instructions
  318. * \details
  319. * there are 14 (RV64 Only) SIMD 32-bit Shift Instructions
  320. */
  321. /**
  322. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC (RV64 Only) SIMD 32-bit Miscellaneous Instructions
  323. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  324. * \brief (RV64 Only) SIMD 32-bit Miscellaneous Instructions
  325. * \details
  326. * there are 5 (RV64 Only) SIMD 32-bit Miscellaneous Instructions
  327. */
  328. /**
  329. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT (RV64 Only) SIMD Q15 Saturating Multiply Instructions
  330. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  331. * \brief (RV64 Only) SIMD Q15 Saturating Multiply Instructions
  332. * \details
  333. * there are 9 (RV64 Only) SIMD Q15 saturating Multiply Instructions
  334. */
  335. /**
  336. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT (RV64 Only) 32-bit Multiply Instructions
  337. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  338. * \brief (RV64 Only) 32-bit Multiply Instructions
  339. * \details
  340. * there is 3 RV64 Only) 32-bit Multiply Instructions
  341. */
  342. /**
  343. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD (RV64 Only) 32-bit Multiply & Add Instructions
  344. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  345. * \brief (RV64 Only) 32-bit Multiply & Add Instructions
  346. * \details
  347. * there are 3 (RV64 Only) 32-bit Multiply & Add Instructions
  348. */
  349. /**
  350. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC (RV64 Only) 32-bit Parallel Multiply & Add Instructions
  351. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  352. * \brief (RV64 Only) 32-bit Parallel Multiply & Add Instructions
  353. * \details
  354. * there are 12 (RV64 Only) 32-bit Parallel Multiply & Add Instructions
  355. */
  356. /**
  357. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT (RV64 Only) Non-SIMD 32-bit Shift Instructions
  358. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  359. * \brief (RV64 Only) Non-SIMD 32-bit Shift Instructions
  360. * \details
  361. * there are 1 (RV64 Only) Non-SIMD 32-bit Shift Instructions
  362. */
  363. /**
  364. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK 32-bit Packing Instructions
  365. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  366. * \brief 32-bit Packing Instructions
  367. * \details
  368. * There are four 32-bit packing instructions here
  369. */
  370. /* ===== Inline Function Start for 3.1. ADD8 ===== */
  371. /**
  372. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  373. * \brief ADD8 (SIMD 8-bit Addition)
  374. * \details
  375. * **Type**: SIMD
  376. *
  377. * **Syntax**:\n
  378. * ~~~
  379. * ADD8 Rd, Rs1, Rs2
  380. * ~~~
  381. *
  382. * **Purpose**:\n
  383. * Do 8-bit integer element additions simultaneously.
  384. *
  385. * **Description**:\n
  386. * This instruction adds the 8-bit integer elements in Rs1 with the 8-bit integer elements
  387. * in Rs2, and then writes the 8-bit element results to Rd.
  388. *
  389. * **Note**:\n
  390. * This instruction can be used for either signed or unsigned addition.
  391. *
  392. * **Operations**:\n
  393. * ~~~
  394. * Rd.B[x] = Rs1.B[x] + Rs2.B[x];
  395. * for RV32: x=3...0,
  396. * for RV64: x=7...0
  397. * ~~~
  398. *
  399. * \param [in] a unsigned long type of value stored in a
  400. * \param [in] b unsigned long type of value stored in b
  401. * \return value stored in unsigned long type
  402. */
  403. __STATIC_FORCEINLINE unsigned long __RV_ADD8(unsigned long a, unsigned long b)
  404. {
  405. unsigned long result;
  406. __ASM volatile("add8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  407. return result;
  408. }
  409. /* ===== Inline Function End for 3.1. ADD8 ===== */
  410. /* ===== Inline Function Start for 3.2. ADD16 ===== */
  411. /**
  412. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  413. * \brief ADD16 (SIMD 16-bit Addition)
  414. * \details
  415. * **Type**: SIMD
  416. *
  417. * **Syntax**:\n
  418. * ~~~
  419. * ADD16 Rd, Rs1, Rs2
  420. * ~~~
  421. *
  422. * **Purpose**:\n
  423. * Do 16-bit integer element additions simultaneously.
  424. *
  425. * **Description**:\n
  426. * This instruction adds the 16-bit integer elements in Rs1 with the 16-bit integer
  427. * elements in Rs2, and then writes the 16-bit element results to Rd.
  428. *
  429. * **Note**:\n
  430. * This instruction can be used for either signed or unsigned addition.
  431. *
  432. * **Operations**:\n
  433. * ~~~
  434. * Rd.H[x] = Rs1.H[x] + Rs2.H[x];
  435. * for RV32: x=1...0,
  436. * for RV64: x=3...0
  437. * ~~~
  438. *
  439. * \param [in] a unsigned long type of value stored in a
  440. * \param [in] b unsigned long type of value stored in b
  441. * \return value stored in unsigned long type
  442. */
  443. __STATIC_FORCEINLINE unsigned long __RV_ADD16(unsigned long a, unsigned long b)
  444. {
  445. unsigned long result;
  446. __ASM volatile("add16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  447. return result;
  448. }
  449. /* ===== Inline Function End for 3.2. ADD16 ===== */
  450. /* ===== Inline Function Start for 3.3. ADD64 ===== */
  451. /**
  452. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  453. * \brief ADD64 (64-bit Addition)
  454. * \details
  455. * **Type**: 64-bit Profile
  456. *
  457. * **Syntax**:\n
  458. * ~~~
  459. * ADD64 Rd, Rs1, Rs2
  460. * ~~~
  461. *
  462. * **Purpose**:\n
  463. * Add two 64-bit signed or unsigned integers.
  464. *
  465. * **RV32 Description**:\n
  466. * This instruction adds the 64-bit integer of an even/odd pair of registers specified
  467. * by Rs1(4,1) with the 64-bit integer of an even/odd pair of registers specified by Rs2(4,1), and then
  468. * writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1).
  469. * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
  470. * pair includes register 2d and 2d+1.
  471. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  472. * of the pair contains the low 32-bit of the result.
  473. *
  474. * **RV64 Description**:\n
  475. * This instruction has the same behavior as the ADD instruction in RV64I.
  476. *
  477. * **Note**:\n
  478. * This instruction can be used for either signed or unsigned addition.
  479. *
  480. * **Operations**:\n
  481. * ~~~
  482. * RV32:
  483. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  484. * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
  485. * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
  486. * R[t_H].R[t_L] = R[a_H].R[a_L] + R[b_H].R[b_L];
  487. * RV64:
  488. * Rd = Rs1 + Rs2;
  489. * ~~~
  490. *
  491. * \param [in] a unsigned long long type of value stored in a
  492. * \param [in] b unsigned long long type of value stored in b
  493. * \return value stored in unsigned long long type
  494. */
  495. __STATIC_FORCEINLINE unsigned long long __RV_ADD64(unsigned long long a, unsigned long long b)
  496. {
  497. unsigned long long result;
  498. __ASM volatile("add64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  499. return result;
  500. }
  501. /* ===== Inline Function End for 3.3. ADD64 ===== */
  502. /* ===== Inline Function Start for 3.4. AVE ===== */
  503. /**
  504. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  505. * \brief AVE (Average with Rounding)
  506. * \details
  507. * **Type**: DSP
  508. *
  509. * **Syntax**:\n
  510. * ~~~
  511. * AVE Rd, Rs1, Rs2
  512. * ~~~
  513. *
  514. * **Purpose**:\n
  515. * Calculate the average of the contents of two general registers.
  516. *
  517. * **Description**:\n
  518. * This instruction calculates the average value of two signed integers stored in Rs1 and
  519. * Rs2, rounds up a half-integer result to the nearest integer, and writes the result to Rd.
  520. *
  521. * **Operations**:\n
  522. * ~~~
  523. * Sum = CONCAT(Rs1[MSB],Rs1[MSB:0]) + CONCAT(Rs2[MSB],Rs2[MSB:0]) + 1;
  524. * Rd = Sum[(MSB+1):1];
  525. * for RV32: MSB=31,
  526. * for RV64: MSB=63
  527. * ~~~
  528. *
  529. * \param [in] a long type of value stored in a
  530. * \param [in] b long type of value stored in b
  531. * \return value stored in long type
  532. */
  533. __STATIC_FORCEINLINE long __RV_AVE(long a, long b)
  534. {
  535. long result;
  536. __ASM volatile("ave %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  537. return result;
  538. }
  539. /* ===== Inline Function End for 3.4. AVE ===== */
  540. /* ===== Inline Function Start for 3.5. BITREV ===== */
  541. /**
  542. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  543. * \brief BITREV (Bit Reverse)
  544. * \details
  545. * **Type**: DSP
  546. *
  547. * **Syntax**:\n
  548. * ~~~
  549. * BITREV Rd, Rs1, Rs2
  550. * ~~~
  551. *
  552. * **Purpose**:\n
  553. * Reverse the bit positions of the source operand within a specified width starting from bit
  554. * 0. The reversed width is a variable from a GPR.
  555. *
  556. * **Description**:\n
  557. * This instruction reverses the bit positions of the content of Rs1. The reversed bit width
  558. * is calculated as Rs2[4:0]+1 (RV32) or Rs2[5:0]+1 (RV64). The upper bits beyond the reversed width
  559. * are filled with zeros. After the bit reverse operation, the result is written to Rd.
  560. *
  561. * **Operations**:\n
  562. * ~~~
  563. * msb = Rs2[4:0]; (for RV32)
  564. * msb = Rs2[5:0]; (for RV64)
  565. * rev[0:msb] = Rs1[msb:0];
  566. * Rd = ZE(rev[msb:0]);
  567. * ~~~
  568. *
  569. * \param [in] a unsigned long type of value stored in a
  570. * \param [in] b unsigned long type of value stored in b
  571. * \return value stored in unsigned long type
  572. */
  573. __STATIC_FORCEINLINE unsigned long __RV_BITREV(unsigned long a, unsigned long b)
  574. {
  575. unsigned long result;
  576. __ASM volatile("bitrev %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  577. return result;
  578. }
  579. /* ===== Inline Function End for 3.5. BITREV ===== */
  580. /* ===== Inline Function Start for 3.6. BITREVI ===== */
  581. /**
  582. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  583. * \brief BITREVI (Bit Reverse Immediate)
  584. * \details
  585. * **Type**: DSP
  586. *
  587. * **Syntax**:\n
  588. * ~~~
  589. * (RV32) BITREVI Rd, Rs1, imm[4:0]
  590. * (RV64) BITREVI Rd, Rs1, imm[5:0]
  591. * ~~~
  592. *
  593. * **Purpose**:\n
  594. * Reverse the bit positions of the source operand within a specified width starting from bit
  595. * 0. The reversed width is an immediate value.
  596. *
  597. * **Description**:\n
  598. * This instruction reverses the bit positions of the content of Rs1. The reversed bit width
  599. * is calculated as imm[4:0]+1 (RV32) or imm[5:0]+1 (RV64). The upper bits beyond the reversed width
  600. * are filled with zeros. After the bit reverse operation, the result is written to Rd.
  601. *
  602. * **Operations**:\n
  603. * ~~~
  604. * msb = imm[4:0]; (RV32)
  605. * msb = imm[5:0]; (RV64)
  606. * rev[0:msb] = Rs1[msb:0];
  607. * Rd = ZE(rev[msb:0]);
  608. * ~~~
  609. *
  610. * \param [in] a unsigned long type of value stored in a
  611. * \param [in] b unsigned long type of value stored in b
  612. * \return value stored in unsigned long type
  613. */
  614. #define __RV_BITREVI(a, b) \
  615. ({ \
  616. unsigned long result; \
  617. unsigned long __a = (unsigned long)(a); \
  618. __ASM volatile("bitrevi %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  619. result; \
  620. })
  621. /* ===== Inline Function End for 3.6. BITREVI ===== */
  622. /* ===== Inline Function Start for 3.7. BPICK ===== */
  623. /**
  624. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  625. * \brief BPICK (Bit-wise Pick)
  626. * \details
  627. * **Type**: DSP
  628. *
  629. * **Syntax**:\n
  630. * ~~~
  631. * BPICK Rd, Rs1, Rs2, Rc
  632. * ~~~
  633. *
  634. * **Purpose**:\n
  635. * Select from two source operands based on a bit mask in the third operand.
  636. *
  637. * **Description**:\n
  638. * This instruction selects individual bits from Rs1 or Rs2, based on the bit mask value in
  639. * Rc. If a bit in Rc is 1, the corresponding bit is from Rs1; otherwise, the corresponding bit is from Rs2.
  640. * The selection results are written to Rd.
  641. *
  642. * **Operations**:\n
  643. * ~~~
  644. * Rd[x] = Rc[x]? Rs1[x] : Rs2[x];
  645. * for RV32, x=31...0
  646. * for RV64, x=63...0
  647. * ~~~
  648. *
  649. * \param [in] a unsigned long type of value stored in a
  650. * \param [in] b unsigned long type of value stored in b
  651. * \param [in] c unsigned long type of value stored in c
  652. * \return value stored in unsigned long type
  653. */
  654. __STATIC_FORCEINLINE unsigned long __RV_BPICK(unsigned long a, unsigned long b, unsigned long c)
  655. {
  656. unsigned long result;
  657. __ASM volatile("bpick %0, %1, %2, %3" : "=r"(result) : "r"(a), "r"(b), "r"(c));
  658. return result;
  659. }
  660. /* ===== Inline Function End for 3.7. BPICK ===== */
  661. /* ===== Inline Function Start for 3.8. CLROV ===== */
  662. /**
  663. * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC
  664. * \brief CLROV (Clear OV flag)
  665. * \details
  666. * **Type**: DSP
  667. *
  668. * **Syntax**:\n
  669. * ~~~
  670. * CLROV # pseudo mnemonic
  671. * ~~~
  672. *
  673. * **Purpose**:\n
  674. * This pseudo instruction is an alias to `CSRRCI x0, ucode, 1` instruction.
  675. *
  676. *
  677. */
  678. __STATIC_FORCEINLINE void __RV_CLROV(void)
  679. {
  680. __ASM volatile("clrov ");
  681. }
  682. /* ===== Inline Function End for 3.8. CLROV ===== */
  683. /* ===== Inline Function Start for 3.9. CLRS8 ===== */
  684. /**
  685. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  686. * \brief CLRS8 (SIMD 8-bit Count Leading Redundant Sign)
  687. * \details
  688. * **Type**: SIMD
  689. *
  690. * **Syntax**:\n
  691. * ~~~
  692. * CLRS8 Rd, Rs1
  693. * ~~~
  694. *
  695. * **Purpose**:\n
  696. * Count the number of redundant sign bits of the 8-bit elements of a general register.
  697. *
  698. * **Description**:\n
  699. * Starting from the bits next to the sign bits of the 8-bit elements of Rs1, this instruction
  700. * counts the number of redundant sign bits and writes the result to the corresponding 8-bit elements
  701. * of Rd.
  702. *
  703. * **Operations**:\n
  704. * ~~~
  705. * snum[x] = Rs1.B[x];
  706. * cnt[x] = 0;
  707. * for (i = 6 to 0) {
  708. * if (snum[x](i) == snum[x](7)) {
  709. * cnt[x] = cnt[x] + 1;
  710. * } else {
  711. * break;
  712. * }
  713. * }
  714. * Rd.B[x] = cnt[x];
  715. * for RV32: x=3...0
  716. * for RV64: x=7...0
  717. * ~~~
  718. *
  719. * \param [in] a unsigned long type of value stored in a
  720. * \return value stored in unsigned long type
  721. */
  722. __STATIC_FORCEINLINE unsigned long __RV_CLRS8(unsigned long a)
  723. {
  724. unsigned long result;
  725. __ASM volatile("clrs8 %0, %1" : "=r"(result) : "r"(a));
  726. return result;
  727. }
  728. /* ===== Inline Function End for 3.9. CLRS8 ===== */
  729. /* ===== Inline Function Start for 3.10. CLRS16 ===== */
  730. /**
  731. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  732. * \brief CLRS16 (SIMD 16-bit Count Leading Redundant Sign)
  733. * \details
  734. * **Type**: SIMD
  735. *
  736. * **Syntax**:\n
  737. * ~~~
  738. * CLRS16 Rd, Rs1
  739. * ~~~
  740. *
  741. * **Purpose**:\n
  742. * Count the number of redundant sign bits of the 16-bit elements of a general register.
  743. *
  744. * **Description**:\n
  745. * Starting from the bits next to the sign bits of the 16-bit elements of Rs1, this
  746. * instruction counts the number of redundant sign bits and writes the result to the corresponding 16-
  747. * bit elements of Rd.
  748. *
  749. * **Operations**:\n
  750. * ~~~
  751. * snum[x] = Rs1.H[x];
  752. * cnt[x] = 0;
  753. * for (i = 14 to 0) {
  754. * if (snum[x](i) == snum[x](15)) {
  755. * cnt[x] = cnt[x] + 1;
  756. * } else {
  757. * break;
  758. * }
  759. * }
  760. * Rd.H[x] = cnt[x];
  761. * for RV32: x=1...0
  762. * for RV64: x=3...0
  763. * ~~~
  764. *
  765. * \param [in] a unsigned long type of value stored in a
  766. * \return value stored in unsigned long type
  767. */
  768. __STATIC_FORCEINLINE unsigned long __RV_CLRS16(unsigned long a)
  769. {
  770. unsigned long result;
  771. __ASM volatile("clrs16 %0, %1" : "=r"(result) : "r"(a));
  772. return result;
  773. }
  774. /* ===== Inline Function End for 3.10. CLRS16 ===== */
  775. /* ===== Inline Function Start for 3.11. CLRS32 ===== */
  776. /**
  777. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
  778. * \brief CLRS32 (SIMD 32-bit Count Leading Redundant Sign)
  779. * \details
  780. * **Type**: SIMD
  781. *
  782. * **Syntax**:\n
  783. * ~~~
  784. * CLRS32 Rd, Rs1
  785. * ~~~
  786. *
  787. * **Purpose**:\n
  788. * Count the number of redundant sign bits of the 32-bit elements of a general register.
  789. *
  790. * **Description**:\n
  791. * Starting from the bits next to the sign bits of the 32-bit elements of Rs1, this
  792. * instruction counts the number of redundant sign bits and writes the result to the corresponding 32-
  793. * bit elements of Rd.
  794. *
  795. * **Operations**:\n
  796. * ~~~
  797. * snum[x] = Rs1.W[x];
  798. * cnt[x] = 0;
  799. * for (i = 30 to 0) {
  800. * if (snum[x](i) == snum[x](31)) {
  801. * cnt[x] = cnt[x] + 1;
  802. * } else {
  803. * break;
  804. * }
  805. * }
  806. * Rd.W[x] = cnt[x];
  807. * for RV32: x=0
  808. * for RV64: x=1...0
  809. * ~~~
  810. *
  811. * \param [in] a unsigned long type of value stored in a
  812. * \return value stored in unsigned long type
  813. */
  814. __STATIC_FORCEINLINE unsigned long __RV_CLRS32(unsigned long a)
  815. {
  816. unsigned long result;
  817. __ASM volatile("clrs32 %0, %1" : "=r"(result) : "r"(a));
  818. return result;
  819. }
  820. /* ===== Inline Function End for 3.11. CLRS32 ===== */
  821. /* ===== Inline Function Start for 3.12. CLO8 ===== */
  822. /**
  823. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  824. * \brief CLO8 (SIMD 8-bit Count Leading One)
  825. * \details
  826. * **Type**: SIMD
  827. *
  828. * **Syntax**:\n
  829. * ~~~
  830. * CLO8 Rd, Rs1
  831. * ~~~
  832. *
  833. * **Purpose**:\n
  834. * Count the number of leading one bits of the 8-bit elements of a general register.
  835. *
  836. * **Description**:\n
  837. * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction
  838. * counts the number of leading one bits and writes the results to the corresponding 8-bit elements of
  839. * Rd.
  840. *
  841. * **Operations**:\n
  842. * ~~~
  843. * snum[x] = Rs1.B[x];
  844. * cnt[x] = 0;
  845. * for (i = 7 to 0) {
  846. * if (snum[x](i) == 1) {
  847. * cnt[x] = cnt[x] + 1;
  848. * } else {
  849. * break;
  850. * }
  851. * }
  852. * Rd.B[x] = cnt[x];
  853. * for RV32: x=3...0
  854. * for RV64: x=7...0
  855. * ~~~
  856. *
  857. * \param [in] a unsigned long type of value stored in a
  858. * \return value stored in unsigned long type
  859. */
  860. __STATIC_FORCEINLINE unsigned long __RV_CLO8(unsigned long a)
  861. {
  862. unsigned long result;
  863. __ASM volatile("clo8 %0, %1" : "=r"(result) : "r"(a));
  864. return result;
  865. }
  866. /* ===== Inline Function End for 3.12. CLO8 ===== */
  867. /* ===== Inline Function Start for 3.13. CLO16 ===== */
  868. /**
  869. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  870. * \brief CLO16 (SIMD 16-bit Count Leading One)
  871. * \details
  872. * **Type**: SIMD
  873. *
  874. * **Syntax**:\n
  875. * ~~~
  876. * CLO16 Rd, Rs1
  877. * ~~~
  878. *
  879. * **Purpose**:\n
  880. * Count the number of leading one bits of the 16-bit elements of a general register.
  881. *
  882. * **Description**:\n
  883. * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction
  884. * counts the number of leading one bits and writes the results to the corresponding 16-bit elements
  885. * of Rd.
  886. *
  887. * **Operations**:\n
  888. * ~~~
  889. * snum[x] = Rs1.H[x];
  890. * cnt[x] = 0;
  891. * for (i = 15 to 0) {
  892. * if (snum[x](i) == 1) {
  893. * cnt[x] = cnt[x] + 1;
  894. * } else {
  895. * break;
  896. * }
  897. * }
  898. * Rd.H[x] = cnt[x];
  899. * for RV32: x=1...0
  900. * for RV64: x=3...0
  901. * ~~~
  902. *
  903. * \param [in] a unsigned long type of value stored in a
  904. * \return value stored in unsigned long type
  905. */
  906. __STATIC_FORCEINLINE unsigned long __RV_CLO16(unsigned long a)
  907. {
  908. unsigned long result;
  909. __ASM volatile("clo16 %0, %1" : "=r"(result) : "r"(a));
  910. return result;
  911. }
  912. /* ===== Inline Function End for 3.13. CLO16 ===== */
  913. /* ===== Inline Function Start for 3.14. CLO32 ===== */
  914. /**
  915. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
  916. * \brief CLO32 (SIMD 32-bit Count Leading One)
  917. * \details
  918. * **Type**: SIMD
  919. *
  920. * **Syntax**:\n
  921. * ~~~
  922. * CLO32 Rd, Rs1
  923. * ~~~
  924. *
  925. * **Purpose**:\n
  926. * Count the number of leading one bits of the 32-bit elements of a general register.
  927. *
  928. * **Description**:\n
  929. * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction
  930. * counts the number of leading one bits and writes the results to the corresponding 32-bit elements
  931. * of Rd.
  932. *
  933. * **Operations**:\n
  934. * ~~~
  935. * snum[x] = Rs1.W[x];
  936. * cnt[x] = 0;
  937. * for (i = 31 to 0) {
  938. * if (snum[x](i) == 1) {
  939. * cnt[x] = cnt[x] + 1;
  940. * } else {
  941. * break;
  942. * }
  943. * }
  944. * Rd.W[x] = cnt[x];
  945. * for RV32: x=0
  946. * for RV64: x=1...0
  947. * ~~~
  948. *
  949. * \param [in] a unsigned long type of value stored in a
  950. * \return value stored in unsigned long type
  951. */
  952. __STATIC_FORCEINLINE unsigned long __RV_CLO32(unsigned long a)
  953. {
  954. unsigned long result;
  955. __ASM volatile("clo32 %0, %1" : "=r"(result) : "r"(a));
  956. return result;
  957. }
  958. /* ===== Inline Function End for 3.14. CLO32 ===== */
  959. /* ===== Inline Function Start for 3.15. CLZ8 ===== */
  960. /**
  961. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  962. * \brief CLZ8 (SIMD 8-bit Count Leading Zero)
  963. * \details
  964. * **Type**: SIMD
  965. *
  966. * **Syntax**:\n
  967. * ~~~
  968. * CLZ8 Rd, Rs1
  969. * ~~~
  970. *
  971. * **Purpose**:\n
  972. * Count the number of leading zero bits of the 8-bit elements of a general register.
  973. *
  974. * **Description**:\n
  975. * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction
  976. * counts the number of leading zero bits and writes the results to the corresponding 8-bit elements of
  977. * Rd.
  978. *
  979. * **Operations**:\n
  980. * ~~~
  981. * snum[x] = Rs1.B[x];
  982. * cnt[x] = 0;
  983. * for (i = 7 to 0) {
  984. * if (snum[x](i) == 0) {
  985. * cnt[x] = cnt[x] + 1;
  986. * } else {
  987. * break;
  988. * }
  989. * }
  990. * Rd.B[x] = cnt[x];
  991. * for RV32: x=3...0
  992. * for RV64: x=7...0
  993. * ~~~
  994. *
  995. * \param [in] a unsigned long type of value stored in a
  996. * \return value stored in unsigned long type
  997. */
  998. __STATIC_FORCEINLINE unsigned long __RV_CLZ8(unsigned long a)
  999. {
  1000. unsigned long result;
  1001. __ASM volatile("clz8 %0, %1" : "=r"(result) : "r"(a));
  1002. return result;
  1003. }
  1004. /* ===== Inline Function End for 3.15. CLZ8 ===== */
  1005. /* ===== Inline Function Start for 3.16. CLZ16 ===== */
  1006. /**
  1007. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  1008. * \brief CLZ16 (SIMD 16-bit Count Leading Zero)
  1009. * \details
  1010. * **Type**: SIMD
  1011. *
  1012. * **Syntax**:\n
  1013. * ~~~
  1014. * CLZ16 Rd, Rs1
  1015. * ~~~
  1016. *
  1017. * **Purpose**:\n
  1018. * Count the number of leading zero bits of the 16-bit elements of a general register.
  1019. *
  1020. * **Description**:\n
  1021. * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction
  1022. * counts the number of leading zero bits and writes the results to the corresponding 16-bit elements
  1023. * of Rd.
  1024. *
  1025. * **Operations**:\n
  1026. * ~~~
  1027. * snum[x] = Rs1.H[x];
  1028. * cnt[x] = 0;
  1029. * for (i = 15 to 0) {
  1030. * if (snum[x](i) == 0) {
  1031. * cnt[x] = cnt[x] + 1;
  1032. * } else {
  1033. * break;
  1034. * }
  1035. * }
  1036. * Rd.H[x] = cnt[x];
  1037. * for RV32: x=1...0
  1038. * for RV64: x=3...0
  1039. * ~~~
  1040. *
  1041. * \param [in] a unsigned long type of value stored in a
  1042. * \return value stored in unsigned long type
  1043. */
  1044. __STATIC_FORCEINLINE unsigned long __RV_CLZ16(unsigned long a)
  1045. {
  1046. unsigned long result;
  1047. __ASM volatile("clz16 %0, %1" : "=r"(result) : "r"(a));
  1048. return result;
  1049. }
  1050. /* ===== Inline Function End for 3.16. CLZ16 ===== */
  1051. /* ===== Inline Function Start for 3.17. CLZ32 ===== */
  1052. /**
  1053. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
  1054. * \brief CLZ32 (SIMD 32-bit Count Leading Zero)
  1055. * \details
  1056. * **Type**: SIMD
  1057. *
  1058. * **Syntax**:\n
  1059. * ~~~
  1060. * CLZ32 Rd, Rs1
  1061. * ~~~
  1062. *
  1063. * **Purpose**:\n
  1064. * Count the number of leading zero bits of the 32-bit elements of a general register.
  1065. *
  1066. * **Description**:\n
  1067. * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction
  1068. * counts the number of leading zero bits and writes the results to the corresponding 32-bit elements
  1069. * of Rd.
  1070. *
  1071. * **Operations**:\n
  1072. * ~~~
  1073. * snum[x] = Rs1.W[x];
  1074. * cnt[x] = 0;
  1075. * for (i = 31 to 0) {
  1076. * if (snum[x](i) == 0) {
  1077. * cnt[x] = cnt[x] + 1;
  1078. * } else {
  1079. * break;
  1080. * }
  1081. * }
  1082. * Rd.W[x] = cnt[x];
  1083. * for RV32: x=0
  1084. * for RV64: x=1...0
  1085. * ~~~
  1086. *
  1087. * \param [in] a unsigned long type of value stored in a
  1088. * \return value stored in unsigned long type
  1089. */
  1090. __STATIC_FORCEINLINE unsigned long __RV_CLZ32(unsigned long a)
  1091. {
  1092. unsigned long result;
  1093. __ASM volatile("clz32 %0, %1" : "=r"(result) : "r"(a));
  1094. return result;
  1095. }
  1096. /* ===== Inline Function End for 3.17. CLZ32 ===== */
  1097. /* ===== Inline Function Start for 3.18. CMPEQ8 ===== */
  1098. /**
  1099. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
  1100. * \brief CMPEQ8 (SIMD 8-bit Integer Compare Equal)
  1101. * \details
  1102. * **Type**: SIMD
  1103. *
  1104. * **Syntax**:\n
  1105. * ~~~
  1106. * CMPEQ8 Rs, Rs1, Rs2
  1107. * ~~~
  1108. *
  1109. * **Purpose**:\n
  1110. * Do 8-bit integer elements equal comparisons simultaneously.
  1111. *
  1112. * **Description**:\n
  1113. * This instruction compares the 8-bit integer elements in Rs1 with the 8-bit integer
  1114. * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFF; otherwise, the result is
  1115. * 0x0. The 8-bit element comparison results are written to Rd.
  1116. *
  1117. * **Note**:\n
  1118. * This instruction can be used for either signed or unsigned numbers.
  1119. *
  1120. * **Operations**:\n
  1121. * ~~~
  1122. * Rd.B[x] = (Rs1.B[x] == Rs2.B[x])? 0xff : 0x0;
  1123. * for RV32: x=3...0,
  1124. * for RV64: x=7...0
  1125. * ~~~
  1126. *
  1127. * \param [in] a unsigned long type of value stored in a
  1128. * \param [in] b unsigned long type of value stored in b
  1129. * \return value stored in unsigned long type
  1130. */
  1131. __STATIC_FORCEINLINE unsigned long __RV_CMPEQ8(unsigned long a, unsigned long b)
  1132. {
  1133. unsigned long result;
  1134. __ASM volatile("cmpeq8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1135. return result;
  1136. }
  1137. /* ===== Inline Function End for 3.18. CMPEQ8 ===== */
  1138. /* ===== Inline Function Start for 3.19. CMPEQ16 ===== */
  1139. /**
  1140. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
  1141. * \brief CMPEQ16 (SIMD 16-bit Integer Compare Equal)
  1142. * \details
  1143. * **Type**: SIMD
  1144. *
  1145. * **Syntax**:\n
  1146. * ~~~
  1147. * CMPEQ16 Rd, Rs1, Rs2
  1148. * ~~~
  1149. *
  1150. * **Purpose**:\n
  1151. * Do 16-bit integer elements equal comparisons simultaneously.
  1152. *
  1153. * **Description**:\n
  1154. * This instruction compares the 16-bit integer elements in Rs1 with the 16-bit integer
  1155. * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFFFF; otherwise, the result
  1156. * is 0x0. The 16-bit element comparison results are written to Rt.
  1157. *
  1158. * **Note**:\n
  1159. * This instruction can be used for either signed or unsigned numbers.
  1160. *
  1161. * **Operations**:\n
  1162. * ~~~
  1163. * Rd.H[x] = (Rs1.H[x] == Rs2.H[x])? 0xffff : 0x0;
  1164. * for RV32: x=1...0,
  1165. * for RV64: x=3...0
  1166. * ~~~
  1167. *
  1168. * \param [in] a unsigned long type of value stored in a
  1169. * \param [in] b unsigned long type of value stored in b
  1170. * \return value stored in unsigned long type
  1171. */
  1172. __STATIC_FORCEINLINE unsigned long __RV_CMPEQ16(unsigned long a, unsigned long b)
  1173. {
  1174. unsigned long result;
  1175. __ASM volatile("cmpeq16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1176. return result;
  1177. }
  1178. /* ===== Inline Function End for 3.19. CMPEQ16 ===== */
  1179. /* ===== Inline Function Start for 3.20. CRAS16 ===== */
  1180. /**
  1181. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  1182. * \brief CRAS16 (SIMD 16-bit Cross Addition & Subtraction)
  1183. * \details
  1184. * **Type**: SIMD
  1185. *
  1186. * **Syntax**:\n
  1187. * ~~~
  1188. * CRAS16 Rd, Rs1, Rs2
  1189. * ~~~
  1190. *
  1191. * **Purpose**:\n
  1192. * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit
  1193. * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
  1194. *
  1195. * **Description**:\n
  1196. * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with
  1197. * the 16-bit integer element in [15:0] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit
  1198. * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [31:16] of 32-bit chunks in
  1199. * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32-
  1200. * bit chunks in Rd.
  1201. *
  1202. * **Note**:\n
  1203. * This instruction can be used for either signed or unsigned operations.
  1204. *
  1205. * **Operations**:\n
  1206. * ~~~
  1207. * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][15:0];
  1208. * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][31:16];
  1209. * for RV32, x=0
  1210. * for RV64, x=1...0
  1211. * ~~~
  1212. *
  1213. * \param [in] a unsigned long type of value stored in a
  1214. * \param [in] b unsigned long type of value stored in b
  1215. * \return value stored in unsigned long type
  1216. */
  1217. __STATIC_FORCEINLINE unsigned long __RV_CRAS16(unsigned long a, unsigned long b)
  1218. {
  1219. unsigned long result;
  1220. __ASM volatile("cras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1221. return result;
  1222. }
  1223. /* ===== Inline Function End for 3.20. CRAS16 ===== */
  1224. /* ===== Inline Function Start for 3.21. CRSA16 ===== */
  1225. /**
  1226. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  1227. * \brief CRSA16 (SIMD 16-bit Cross Subtraction & Addition)
  1228. * \details
  1229. * **Type**: SIMD
  1230. *
  1231. * **Syntax**:\n
  1232. * ~~~
  1233. * CRSA16 Rd, Rs1, Rs2
  1234. * ~~~
  1235. *
  1236. * **Purpose**:\n
  1237. * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit
  1238. * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
  1239. *
  1240. * **Description**:\n
  1241. * This instruction subtracts the 16-bit integer element in [15:0] of 32-bit chunks in Rs2
  1242. * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of
  1243. * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [31:16] of 32-bit chunks
  1244. * in Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to
  1245. * [15:0] of 32-bit chunks in Rd.
  1246. *
  1247. * **Note**:\n
  1248. * This instruction can be used for either signed or unsigned operations.
  1249. *
  1250. * **Operations**:\n
  1251. * ~~~
  1252. * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][15:0];
  1253. * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][31:16];
  1254. * for RV32, x=0
  1255. * for RV64, x=1...0
  1256. * ~~~
  1257. *
  1258. * \param [in] a unsigned long type of value stored in a
  1259. * \param [in] b unsigned long type of value stored in b
  1260. * \return value stored in unsigned long type
  1261. */
  1262. __STATIC_FORCEINLINE unsigned long __RV_CRSA16(unsigned long a, unsigned long b)
  1263. {
  1264. unsigned long result;
  1265. __ASM volatile("crsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1266. return result;
  1267. }
  1268. /* ===== Inline Function End for 3.21. CRSA16 ===== */
  1269. /* ===== Inline Function Start for 3.22. INSB ===== */
  1270. /**
  1271. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  1272. * \brief INSB (Insert Byte)
  1273. * \details
  1274. * **Type**: DSP
  1275. *
  1276. * **Syntax**:\n
  1277. * ~~~
  1278. * (RV32) INSB Rd, Rs1, imm[1:0]
  1279. * (RV64) INSB Rd, Rs1, imm[2:0]
  1280. * ~~~
  1281. *
  1282. * **Purpose**:\n
  1283. * Insert byte 0 of a 32-bit or 64-bit register into one of the byte elements of another register.
  1284. *
  1285. * **Description**:\n
  1286. * This instruction inserts byte 0 of Rs1 into byte `imm[1:0]` (RV32) or `imm[2:0]` (RV64)
  1287. * of Rd.
  1288. *
  1289. * **Operations**:\n
  1290. * ~~~
  1291. * bpos = imm[1:0]; (RV32)
  1292. * bpos = imm[2:0]; (RV64)
  1293. * Rd.B[bpos] = Rs1.B[0]
  1294. * ~~~
  1295. *
  1296. * \param [in] t unsigned long type of value stored in t
  1297. * \param [in] a unsigned long type of value stored in a
  1298. * \param [in] b unsigned long type of value stored in b
  1299. * \return value stored in unsigned long type
  1300. */
  1301. #define __RV_INSB(t, a, b) \
  1302. ({ \
  1303. unsigned long __t = (unsigned long)(t); \
  1304. unsigned long __a = (unsigned long)(a); \
  1305. __ASM volatile("insb %0, %1, %2" : "+r"(__t) : "r"(__a), "K"(b)); \
  1306. __t; \
  1307. })
  1308. /* ===== Inline Function End for 3.22. INSB ===== */
  1309. /* ===== Inline Function Start for 3.23. KABS8 ===== */
  1310. /**
  1311. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  1312. * \brief KABS8 (SIMD 8-bit Saturating Absolute)
  1313. * \details
  1314. * **Type**: SIMD
  1315. *
  1316. * **Syntax**:\n
  1317. * ~~~
  1318. * KABS8 Rd, Rs1
  1319. * ~~~
  1320. *
  1321. * **Purpose**:\n
  1322. * Get the absolute value of 8-bit signed integer elements simultaneously.
  1323. *
  1324. * **Description**:\n
  1325. * This instruction calculates the absolute value of 8-bit signed integer elements stored
  1326. * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates
  1327. * 0x7f as the output and sets the OV bit to 1.
  1328. *
  1329. * **Operations**:\n
  1330. * ~~~
  1331. * src = Rs1.B[x];
  1332. * if (src == 0x80) {
  1333. * src = 0x7f;
  1334. * OV = 1;
  1335. * } else if (src[7] == 1)
  1336. * src = -src;
  1337. * }
  1338. * Rd.B[x] = src;
  1339. * for RV32: x=3...0,
  1340. * for RV64: x=7...0
  1341. * ~~~
  1342. *
  1343. * \param [in] a unsigned long type of value stored in a
  1344. * \return value stored in unsigned long type
  1345. */
  1346. __STATIC_FORCEINLINE unsigned long __RV_KABS8(unsigned long a)
  1347. {
  1348. unsigned long result;
  1349. __ASM volatile("kabs8 %0, %1" : "=r"(result) : "r"(a));
  1350. return result;
  1351. }
  1352. /* ===== Inline Function End for 3.23. KABS8 ===== */
  1353. /* ===== Inline Function Start for 3.24. KABS16 ===== */
  1354. /**
  1355. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  1356. * \brief KABS16 (SIMD 16-bit Saturating Absolute)
  1357. * \details
  1358. * **Type**: SIMD
  1359. *
  1360. * **Syntax**:\n
  1361. * ~~~
  1362. * KABS16 Rd, Rs1
  1363. * ~~~
  1364. *
  1365. * **Purpose**:\n
  1366. * Get the absolute value of 16-bit signed integer elements simultaneously.
  1367. *
  1368. * **Description**:\n
  1369. * This instruction calculates the absolute value of 16-bit signed integer elements stored
  1370. * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction
  1371. * generates 0x7fff as the output and sets the OV bit to 1.
  1372. *
  1373. * **Operations**:\n
  1374. * ~~~
  1375. * src = Rs1.H[x];
  1376. * if (src == 0x8000) {
  1377. * src = 0x7fff;
  1378. * OV = 1;
  1379. * } else if (src[15] == 1)
  1380. * src = -src;
  1381. * }
  1382. * Rd.H[x] = src;
  1383. * for RV32: x=1...0,
  1384. * for RV64: x=3...0
  1385. * ~~~
  1386. *
  1387. * \param [in] a unsigned long type of value stored in a
  1388. * \return value stored in unsigned long type
  1389. */
  1390. __STATIC_FORCEINLINE unsigned long __RV_KABS16(unsigned long a)
  1391. {
  1392. unsigned long result;
  1393. __ASM volatile("kabs16 %0, %1" : "=r"(result) : "r"(a));
  1394. return result;
  1395. }
  1396. /* ===== Inline Function End for 3.24. KABS16 ===== */
  1397. /* ===== Inline Function Start for 3.25. KABSW ===== */
  1398. /**
  1399. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  1400. * \brief KABSW (Scalar 32-bit Absolute Value with Saturation)
  1401. * \details
  1402. * **Type**: DSP
  1403. *
  1404. * **Syntax**:\n
  1405. * ~~~
  1406. * KABSW Rd, Rs1
  1407. * ~~~
  1408. *
  1409. * **Purpose**:\n
  1410. * Get the absolute value of a signed 32-bit integer in a general register.
  1411. *
  1412. * **Description**:\n
  1413. * This instruction calculates the absolute value of a signed 32-bit integer stored in Rs1.
  1414. * The result is sign-extended (for RV64) and written to Rd. This instruction with the minimum
  1415. * negative integer input of 0x80000000 will produce a saturated output of maximum positive integer
  1416. * of 0x7fffffff and the OV flag will be set to 1.
  1417. *
  1418. * **Operations**:\n
  1419. * ~~~
  1420. * if (Rs1.W[0] >= 0) {
  1421. * res = Rs1.W[0];
  1422. * } else {
  1423. * If (Rs1.W[0] == 0x80000000) {
  1424. * res = 0x7fffffff;
  1425. * OV = 1;
  1426. * } else {
  1427. * res = -Rs1.W[0];
  1428. * }
  1429. * }
  1430. * Rd = SE32(res);
  1431. * ~~~
  1432. *
  1433. * \param [in] a signed long type of value stored in a
  1434. * \return value stored in unsigned long type
  1435. */
  1436. __STATIC_FORCEINLINE unsigned long __RV_KABSW(signed long a)
  1437. {
  1438. unsigned long result;
  1439. __ASM volatile("kabsw %0, %1" : "=r"(result) : "r"(a));
  1440. return result;
  1441. }
  1442. /* ===== Inline Function End for 3.25. KABSW ===== */
  1443. /* ===== Inline Function Start for 3.26. KADD8 ===== */
  1444. /**
  1445. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  1446. * \brief KADD8 (SIMD 8-bit Signed Saturating Addition)
  1447. * \details
  1448. * **Type**: SIMD
  1449. *
  1450. * **Syntax**:\n
  1451. * ~~~
  1452. * KADD8 Rd, Rs1, Rs2
  1453. * ~~~
  1454. *
  1455. * **Purpose**:\n
  1456. * Do 8-bit signed integer element saturating additions simultaneously.
  1457. *
  1458. * **Description**:\n
  1459. * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
  1460. * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they
  1461. * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
  1462. *
  1463. * **Operations**:\n
  1464. * ~~~
  1465. * res[x] = Rs1.B[x] + Rs2.B[x];
  1466. * if (res[x] > 127) {
  1467. * res[x] = 127;
  1468. * OV = 1;
  1469. * } else if (res[x] < -128) {
  1470. * res[x] = -128;
  1471. * OV = 1;
  1472. * }
  1473. * Rd.B[x] = res[x];
  1474. * for RV32: x=3...0,
  1475. * for RV64: x=7...0
  1476. * ~~~
  1477. *
  1478. * \param [in] a unsigned long type of value stored in a
  1479. * \param [in] b unsigned long type of value stored in b
  1480. * \return value stored in unsigned long type
  1481. */
  1482. __STATIC_FORCEINLINE unsigned long __RV_KADD8(unsigned long a, unsigned long b)
  1483. {
  1484. unsigned long result;
  1485. __ASM volatile("kadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1486. return result;
  1487. }
  1488. /* ===== Inline Function End for 3.26. KADD8 ===== */
  1489. /* ===== Inline Function Start for 3.27. KADD16 ===== */
  1490. /**
  1491. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  1492. * \brief KADD16 (SIMD 16-bit Signed Saturating Addition)
  1493. * \details
  1494. * **Type**: SIMD
  1495. *
  1496. * **Syntax**:\n
  1497. * ~~~
  1498. * KADD16 Rd, Rs1, Rs2
  1499. * ~~~
  1500. *
  1501. * **Purpose**:\n
  1502. * Do 16-bit signed integer element saturating additions simultaneously.
  1503. *
  1504. * **Description**:\n
  1505. * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
  1506. * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1),
  1507. * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
  1508. *
  1509. * **Operations**:\n
  1510. * ~~~
  1511. * res[x] = Rs1.H[x] + Rs2.H[x];
  1512. * if (res[x] > 32767) {
  1513. * res[x] = 32767;
  1514. * OV = 1;
  1515. * } else if (res[x] < -32768) {
  1516. * res[x] = -32768;
  1517. * OV = 1;
  1518. * }
  1519. * Rd.H[x] = res[x];
  1520. * for RV32: x=1...0,
  1521. * for RV64: x=3...0
  1522. * ~~~
  1523. *
  1524. * \param [in] a unsigned long type of value stored in a
  1525. * \param [in] b unsigned long type of value stored in b
  1526. * \return value stored in unsigned long type
  1527. */
  1528. __STATIC_FORCEINLINE unsigned long __RV_KADD16(unsigned long a, unsigned long b)
  1529. {
  1530. unsigned long result;
  1531. __ASM volatile("kadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1532. return result;
  1533. }
  1534. /* ===== Inline Function End for 3.27. KADD16 ===== */
  1535. /* ===== Inline Function Start for 3.28. KADD64 ===== */
  1536. /**
  1537. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  1538. * \brief KADD64 (64-bit Signed Saturating Addition)
  1539. * \details
  1540. * **Type**: DSP (64-bit Profile)
  1541. *
  1542. * **Syntax**:\n
  1543. * ~~~
  1544. * KADD64 Rd, Rs1, Rs2
  1545. * ~~~
  1546. *
  1547. * **Purpose**:\n
  1548. * Add two 64-bit signed integers. The result is saturated to the Q63 range.
  1549. *
  1550. * **RV32 Description**:\n
  1551. * This instruction adds the 64-bit signed integer of an even/odd pair of registers
  1552. * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by
  1553. * Rs2(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the
  1554. * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers
  1555. * specified by Rd(4,1).
  1556. * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
  1557. * pair includes register 2d and 2d+1.
  1558. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  1559. * of the pair contains the low 32-bit of the result.
  1560. *
  1561. * **RV64 Description**:\n
  1562. * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed
  1563. * integer in Rs2. If the result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the
  1564. * range and the OV bit is set to 1. The saturated result is written to Rd.
  1565. *
  1566. * **Operations**:\n
  1567. * ~~~
  1568. * RV32:
  1569. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  1570. * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
  1571. * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
  1572. * result = R[a_H].R[a_L] + R[b_H].R[b_L];
  1573. * if (result > (2^63)-1) {
  1574. * result = (2^63)-1; OV = 1;
  1575. * } else if (result < -2^63) {
  1576. * result = -2^63; OV = 1;
  1577. * }
  1578. * R[t_H].R[t_L] = result;
  1579. * RV64:
  1580. * result = Rs1 + Rs2;
  1581. * if (result > (2^63)-1) {
  1582. * result = (2^63)-1; OV = 1;
  1583. * } else if (result < -2^63) {
  1584. * result = -2^63; OV = 1;
  1585. * }
  1586. * Rd = result;
  1587. * ~~~
  1588. *
  1589. * \param [in] a long long type of value stored in a
  1590. * \param [in] b long long type of value stored in b
  1591. * \return value stored in long long type
  1592. */
  1593. __STATIC_FORCEINLINE long long __RV_KADD64(long long a, long long b)
  1594. {
  1595. long long result;
  1596. __ASM volatile("kadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1597. return result;
  1598. }
  1599. /* ===== Inline Function End for 3.28. KADD64 ===== */
  1600. /* ===== Inline Function Start for 3.29. KADDH ===== */
  1601. /**
  1602. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
  1603. * \brief KADDH (Signed Addition with Q15 Saturation)
  1604. * \details
  1605. * **Type**: DSP
  1606. *
  1607. * **Syntax**:\n
  1608. * ~~~
  1609. * KADDH Rd, Rs1, Rs2
  1610. * ~~~
  1611. *
  1612. * **Purpose**:\n
  1613. * Add the signed lower 32-bit content of two registers with Q15 saturation.
  1614. *
  1615. * **Description**:\n
  1616. * The signed lower 32-bit content of Rs1 is added with the signed lower 32-bit content of
  1617. * Rs2. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then sign-
  1618. * extended and written to Rd. If saturation happens, this instruction sets the OV flag.
  1619. *
  1620. * **Operations**:\n
  1621. * ~~~
  1622. * tmp = Rs1.W[0] + Rs2.W[0];
  1623. * if (tmp > 32767) {
  1624. * res = 32767;
  1625. * OV = 1;
  1626. * } else if (tmp < -32768) {
  1627. * res = -32768;
  1628. * OV = 1
  1629. * } else {
  1630. * res = tmp;
  1631. * }
  1632. * Rd = SE(tmp[15:0]);
  1633. * ~~~
  1634. *
  1635. * \param [in] a int type of value stored in a
  1636. * \param [in] b int type of value stored in b
  1637. * \return value stored in long type
  1638. */
  1639. __STATIC_FORCEINLINE long __RV_KADDH(int a, int b)
  1640. {
  1641. long result;
  1642. __ASM volatile("kaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1643. return result;
  1644. }
  1645. /* ===== Inline Function End for 3.29. KADDH ===== */
  1646. /* ===== Inline Function Start for 3.30. KADDW ===== */
  1647. /**
  1648. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  1649. * \brief KADDW (Signed Addition with Q31 Saturation)
  1650. * \details
  1651. * **Type**: DSP
  1652. *
  1653. * **Syntax**:\n
  1654. * ~~~
  1655. * KADDW Rd, Rs1, Rs2
  1656. * ~~~
  1657. *
  1658. * **Purpose**:\n
  1659. * Add the lower 32-bit signed content of two registers with Q31 saturation.
  1660. *
  1661. * **Description**:\n
  1662. * The lower 32-bit signed content of Rs1 is added with the lower 32-bit signed content of
  1663. * Rs2. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then sign-
  1664. * extended and written to Rd. If saturation happens, this instruction sets the OV flag.
  1665. *
  1666. * **Operations**:\n
  1667. * ~~~
  1668. * tmp = Rs1.W[0] + Rs2.W[0];
  1669. * if (tmp > (2^31)-1) {
  1670. * res = (2^31)-1;
  1671. * OV = 1;
  1672. * } else if (tmp < -2^31) {
  1673. * res = -2^31;
  1674. * OV = 1
  1675. * } else {
  1676. * res = tmp;
  1677. * }
  1678. * Rd = res[31:0]; // RV32
  1679. * Rd = SE(res[31:0]) // RV64
  1680. * ~~~
  1681. *
  1682. * \param [in] a int type of value stored in a
  1683. * \param [in] b int type of value stored in b
  1684. * \return value stored in long type
  1685. */
  1686. __STATIC_FORCEINLINE long __RV_KADDW(int a, int b)
  1687. {
  1688. long result;
  1689. __ASM volatile("kaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1690. return result;
  1691. }
  1692. /* ===== Inline Function End for 3.30. KADDW ===== */
  1693. /* ===== Inline Function Start for 3.31. KCRAS16 ===== */
  1694. /**
  1695. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  1696. * \brief KCRAS16 (SIMD 16-bit Signed Saturating Cross Addition & Subtraction)
  1697. * \details
  1698. * **Type**: SIMD
  1699. *
  1700. * **Syntax**:\n
  1701. * ~~~
  1702. * KCRAS16 Rd, Rs1, Rs2
  1703. * ~~~
  1704. *
  1705. * **Purpose**:\n
  1706. * Do 16-bit signed integer element saturating addition and 16-bit signed integer element
  1707. * saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-
  1708. * bit chunks.
  1709. *
  1710. * **Description**:\n
  1711. * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
  1712. * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it
  1713. * subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed
  1714. * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
  1715. * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
  1716. * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for
  1717. * subtraction.
  1718. *
  1719. * **Operations**:\n
  1720. * ~~~
  1721. * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
  1722. * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
  1723. * for (res in [res1, res2]) {
  1724. * if (res > (2^15)-1) {
  1725. * res = (2^15)-1;
  1726. * OV = 1;
  1727. * } else if (res < -2^15) {
  1728. * res = -2^15;
  1729. * OV = 1;
  1730. * }
  1731. * }
  1732. * Rd.W[x][31:16] = res1;
  1733. * Rd.W[x][15:0] = res2;
  1734. * for RV32, x=0
  1735. * for RV64, x=1...0
  1736. * ~~~
  1737. *
  1738. * \param [in] a unsigned long type of value stored in a
  1739. * \param [in] b unsigned long type of value stored in b
  1740. * \return value stored in unsigned long type
  1741. */
  1742. __STATIC_FORCEINLINE unsigned long __RV_KCRAS16(unsigned long a, unsigned long b)
  1743. {
  1744. unsigned long result;
  1745. __ASM volatile("kcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1746. return result;
  1747. }
  1748. /* ===== Inline Function End for 3.31. KCRAS16 ===== */
  1749. /* ===== Inline Function Start for 3.32. KCRSA16 ===== */
  1750. /**
  1751. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  1752. * \brief KCRSA16 (SIMD 16-bit Signed Saturating Cross Subtraction & Addition)
  1753. * \details
  1754. * **Type**: SIMD
  1755. *
  1756. * **Syntax**:\n
  1757. * ~~~
  1758. * KCRSA16 Rd, Rs1, Rs2
  1759. * ~~~
  1760. *
  1761. * **Purpose**:\n
  1762. * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element
  1763. * saturating addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit
  1764. * chunks.
  1765. *
  1766. * **Description**:\n
  1767. * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks
  1768. * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it
  1769. * adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 with the 16-bit signed
  1770. * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
  1771. * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
  1772. * results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd
  1773. * for addition.
  1774. *
  1775. * **Operations**:\n
  1776. * ~~~
  1777. * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
  1778. * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
  1779. * for (res in [res1, res2]) {
  1780. * if (res > (2^15)-1) {
  1781. * res = (2^15)-1;
  1782. * OV = 1;
  1783. * } else if (res < -2^15) {
  1784. * res = -2^15;
  1785. * OV = 1;
  1786. * }
  1787. * }
  1788. * Rd.W[x][31:16] = res1;
  1789. * Rd.W[x][15:0] = res2;
  1790. * for RV32, x=0
  1791. * for RV64, x=1...0
  1792. * ~~~
  1793. *
  1794. * \param [in] a unsigned long type of value stored in a
  1795. * \param [in] b unsigned long type of value stored in b
  1796. * \return value stored in unsigned long type
  1797. */
  1798. __STATIC_FORCEINLINE unsigned long __RV_KCRSA16(unsigned long a, unsigned long b)
  1799. {
  1800. unsigned long result;
  1801. __ASM volatile("kcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1802. return result;
  1803. }
  1804. /* ===== Inline Function End for 3.32. KCRSA16 ===== */
  1805. /* ===== Inline Function Start for 3.33.1. KDMBB ===== */
  1806. /**
  1807. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  1808. * \brief KDMBB (Signed Saturating Double Multiply B16 x B16)
  1809. * \details
  1810. * **Type**: DSP
  1811. *
  1812. * **Syntax**:\n
  1813. * ~~~
  1814. * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  1815. * ~~~
  1816. *
  1817. * **Purpose**:\n
  1818. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  1819. * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
  1820. * written into the destination register for RV32 or sign-extended to 64-bits and written into the
  1821. * destination register for RV64. If saturation happens, an overflow flag OV will be set.
  1822. *
  1823. * **Description**:\n
  1824. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  1825. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
  1826. * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
  1827. * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
  1828. * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
  1829. *
  1830. * **Operations**:\n
  1831. * ~~~
  1832. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
  1833. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
  1834. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
  1835. * If (0x8000 != aop | 0x8000 != bop) {
  1836. * Mresult = aop * bop;
  1837. * resQ31 = Mresult << 1;
  1838. * Rd = resQ31; // RV32
  1839. * Rd = SE(resQ31); // RV64
  1840. * } else {
  1841. * resQ31 = 0x7FFFFFFF;
  1842. * Rd = resQ31; // RV32
  1843. * Rd = SE(resQ31); // RV64
  1844. * OV = 1;
  1845. * }
  1846. * ~~~
  1847. *
  1848. * \param [in] a unsigned int type of value stored in a
  1849. * \param [in] b unsigned int type of value stored in b
  1850. * \return value stored in long type
  1851. */
  1852. __STATIC_FORCEINLINE long __RV_KDMBB(unsigned int a, unsigned int b)
  1853. {
  1854. long result;
  1855. __ASM volatile("kdmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1856. return result;
  1857. }
  1858. /* ===== Inline Function End for 3.33.1. KDMBB ===== */
  1859. /* ===== Inline Function Start for 3.33.2. KDMBT ===== */
  1860. /**
  1861. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  1862. * \brief KDMBT (Signed Saturating Double Multiply B16 x T16)
  1863. * \details
  1864. * **Type**: DSP
  1865. *
  1866. * **Syntax**:\n
  1867. * ~~~
  1868. * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  1869. * ~~~
  1870. *
  1871. * **Purpose**:\n
  1872. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  1873. * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
  1874. * written into the destination register for RV32 or sign-extended to 64-bits and written into the
  1875. * destination register for RV64. If saturation happens, an overflow flag OV will be set.
  1876. *
  1877. * **Description**:\n
  1878. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  1879. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
  1880. * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
  1881. * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
  1882. * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
  1883. *
  1884. * **Operations**:\n
  1885. * ~~~
  1886. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
  1887. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
  1888. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
  1889. * If (0x8000 != aop | 0x8000 != bop) {
  1890. * Mresult = aop * bop;
  1891. * resQ31 = Mresult << 1;
  1892. * Rd = resQ31; // RV32
  1893. * Rd = SE(resQ31); // RV64
  1894. * } else {
  1895. * resQ31 = 0x7FFFFFFF;
  1896. * Rd = resQ31; // RV32
  1897. * Rd = SE(resQ31); // RV64
  1898. * OV = 1;
  1899. * }
  1900. * ~~~
  1901. *
  1902. * \param [in] a unsigned int type of value stored in a
  1903. * \param [in] b unsigned int type of value stored in b
  1904. * \return value stored in long type
  1905. */
  1906. __STATIC_FORCEINLINE long __RV_KDMBT(unsigned int a, unsigned int b)
  1907. {
  1908. long result;
  1909. __ASM volatile("kdmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1910. return result;
  1911. }
  1912. /* ===== Inline Function End for 3.33.2. KDMBT ===== */
  1913. /* ===== Inline Function Start for 3.33.3. KDMTT ===== */
  1914. /**
  1915. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  1916. * \brief KDMTT (Signed Saturating Double Multiply T16 x T16)
  1917. * \details
  1918. * **Type**: DSP
  1919. *
  1920. * **Syntax**:\n
  1921. * ~~~
  1922. * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  1923. * ~~~
  1924. *
  1925. * **Purpose**:\n
  1926. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  1927. * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
  1928. * written into the destination register for RV32 or sign-extended to 64-bits and written into the
  1929. * destination register for RV64. If saturation happens, an overflow flag OV will be set.
  1930. *
  1931. * **Description**:\n
  1932. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  1933. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
  1934. * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
  1935. * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
  1936. * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
  1937. *
  1938. * **Operations**:\n
  1939. * ~~~
  1940. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
  1941. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
  1942. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
  1943. * If (0x8000 != aop | 0x8000 != bop) {
  1944. * Mresult = aop * bop;
  1945. * resQ31 = Mresult << 1;
  1946. * Rd = resQ31; // RV32
  1947. * Rd = SE(resQ31); // RV64
  1948. * } else {
  1949. * resQ31 = 0x7FFFFFFF;
  1950. * Rd = resQ31; // RV32
  1951. * Rd = SE(resQ31); // RV64
  1952. * OV = 1;
  1953. * }
  1954. * ~~~
  1955. *
  1956. * \param [in] a unsigned int type of value stored in a
  1957. * \param [in] b unsigned int type of value stored in b
  1958. * \return value stored in long type
  1959. */
  1960. __STATIC_FORCEINLINE long __RV_KDMTT(unsigned int a, unsigned int b)
  1961. {
  1962. long result;
  1963. __ASM volatile("kdmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1964. return result;
  1965. }
  1966. /* ===== Inline Function End for 3.33.3. KDMTT ===== */
  1967. /* ===== Inline Function Start for 3.34.1. KDMABB ===== */
  1968. /**
  1969. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  1970. * \brief KDMABB (Signed Saturating Double Multiply Addition B16 x B16)
  1971. * \details
  1972. * **Type**: DSP
  1973. *
  1974. * **Syntax**:\n
  1975. * ~~~
  1976. * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  1977. * ~~~
  1978. *
  1979. * **Purpose**:\n
  1980. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  1981. * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
  1982. * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
  1983. * result into the destination register. If saturation happens, an overflow flag OV will be set.
  1984. *
  1985. * **Description**:\n
  1986. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  1987. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
  1988. * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
  1989. * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
  1990. * the OV flag is set to 1. The result after saturation is written to Rd.
  1991. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
  1992. * set.
  1993. *
  1994. * **Operations**:\n
  1995. * ~~~
  1996. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
  1997. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
  1998. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
  1999. * If (0x8000 != aop | 0x8000 != bop) {
  2000. * Mresult = aop * bop;
  2001. * resQ31 = Mresult << 1;
  2002. * } else {
  2003. * resQ31 = 0x7FFFFFFF;
  2004. * OV = 1;
  2005. * }
  2006. * resadd = Rd + resQ31; // RV32
  2007. * resadd = Rd.W[0] + resQ31; // RV64
  2008. * if (resadd > (2^31)-1) {
  2009. * resadd = (2^31)-1;
  2010. * OV = 1;
  2011. * } else if (resadd < -2^31) {
  2012. * resadd = -2^31;
  2013. * OV = 1;
  2014. * }
  2015. * Rd = resadd; // RV32
  2016. * Rd = SE(resadd); // RV64
  2017. * ~~~
  2018. *
  2019. * \param [in] t long type of value stored in t
  2020. * \param [in] a unsigned int type of value stored in a
  2021. * \param [in] b unsigned int type of value stored in b
  2022. * \return value stored in long type
  2023. */
  2024. __STATIC_FORCEINLINE long __RV_KDMABB(long t, unsigned int a, unsigned int b)
  2025. {
  2026. __ASM volatile("kdmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2027. return t;
  2028. }
  2029. /* ===== Inline Function End for 3.34.1. KDMABB ===== */
  2030. /* ===== Inline Function Start for 3.34.2. KDMABT ===== */
  2031. /**
  2032. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  2033. * \brief KDMABT (Signed Saturating Double Multiply Addition B16 x T16)
  2034. * \details
  2035. * **Type**: DSP
  2036. *
  2037. * **Syntax**:\n
  2038. * ~~~
  2039. * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  2040. * ~~~
  2041. *
  2042. * **Purpose**:\n
  2043. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  2044. * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
  2045. * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
  2046. * result into the destination register. If saturation happens, an overflow flag OV will be set.
  2047. *
  2048. * **Description**:\n
  2049. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  2050. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
  2051. * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
  2052. * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
  2053. * the OV flag is set to 1. The result after saturation is written to Rd.
  2054. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
  2055. * set.
  2056. *
  2057. * **Operations**:\n
  2058. * ~~~
  2059. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
  2060. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
  2061. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
  2062. * If (0x8000 != aop | 0x8000 != bop) {
  2063. * Mresult = aop * bop;
  2064. * resQ31 = Mresult << 1;
  2065. * } else {
  2066. * resQ31 = 0x7FFFFFFF;
  2067. * OV = 1;
  2068. * }
  2069. * resadd = Rd + resQ31; // RV32
  2070. * resadd = Rd.W[0] + resQ31; // RV64
  2071. * if (resadd > (2^31)-1) {
  2072. * resadd = (2^31)-1;
  2073. * OV = 1;
  2074. * } else if (resadd < -2^31) {
  2075. * resadd = -2^31;
  2076. * OV = 1;
  2077. * }
  2078. * Rd = resadd; // RV32
  2079. * Rd = SE(resadd); // RV64
  2080. * ~~~
  2081. *
  2082. * \param [in] t long type of value stored in t
  2083. * \param [in] a unsigned int type of value stored in a
  2084. * \param [in] b unsigned int type of value stored in b
  2085. * \return value stored in long type
  2086. */
  2087. __STATIC_FORCEINLINE long __RV_KDMABT(long t, unsigned int a, unsigned int b)
  2088. {
  2089. __ASM volatile("kdmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2090. return t;
  2091. }
  2092. /* ===== Inline Function End for 3.34.2. KDMABT ===== */
  2093. /* ===== Inline Function Start for 3.34.3. KDMATT ===== */
  2094. /**
  2095. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  2096. * \brief KDMATT (Signed Saturating Double Multiply Addition T16 x T16)
  2097. * \details
  2098. * **Type**: DSP
  2099. *
  2100. * **Syntax**:\n
  2101. * ~~~
  2102. * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  2103. * ~~~
  2104. *
  2105. * **Purpose**:\n
  2106. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  2107. * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
  2108. * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
  2109. * result into the destination register. If saturation happens, an overflow flag OV will be set.
  2110. *
  2111. * **Description**:\n
  2112. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  2113. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
  2114. * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
  2115. * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
  2116. * the OV flag is set to 1. The result after saturation is written to Rd.
  2117. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
  2118. * set.
  2119. *
  2120. * **Operations**:\n
  2121. * ~~~
  2122. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
  2123. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
  2124. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
  2125. * If (0x8000 != aop | 0x8000 != bop) {
  2126. * Mresult = aop * bop;
  2127. * resQ31 = Mresult << 1;
  2128. * } else {
  2129. * resQ31 = 0x7FFFFFFF;
  2130. * OV = 1;
  2131. * }
  2132. * resadd = Rd + resQ31; // RV32
  2133. * resadd = Rd.W[0] + resQ31; // RV64
  2134. * if (resadd > (2^31)-1) {
  2135. * resadd = (2^31)-1;
  2136. * OV = 1;
  2137. * } else if (resadd < -2^31) {
  2138. * resadd = -2^31;
  2139. * OV = 1;
  2140. * }
  2141. * Rd = resadd; // RV32
  2142. * Rd = SE(resadd); // RV64
  2143. * ~~~
  2144. *
  2145. * \param [in] t long type of value stored in t
  2146. * \param [in] a unsigned int type of value stored in a
  2147. * \param [in] b unsigned int type of value stored in b
  2148. * \return value stored in long type
  2149. */
  2150. __STATIC_FORCEINLINE long __RV_KDMATT(long t, unsigned int a, unsigned int b)
  2151. {
  2152. __ASM volatile("kdmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2153. return t;
  2154. }
  2155. /* ===== Inline Function End for 3.34.3. KDMATT ===== */
  2156. /* ===== Inline Function Start for 3.35.1. KHM8 ===== */
  2157. /**
  2158. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
  2159. * \brief KHM8 (SIMD Signed Saturating Q7 Multiply)
  2160. * \details
  2161. * **Type**: SIMD
  2162. *
  2163. * **Syntax**:\n
  2164. * ~~~
  2165. * KHM8 Rd, Rs1, Rs2
  2166. * KHMX8 Rd, Rs1, Rs2
  2167. * ~~~
  2168. *
  2169. * **Purpose**:\n
  2170. * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
  2171. * numbers again.
  2172. *
  2173. * **Description**:\n
  2174. * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
  2175. * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
  2176. * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
  2177. * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
  2178. * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
  2179. * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
  2180. * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
  2181. * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
  2182. * The result will be saturated to 0x7F and the overflow flag OV will be set.
  2183. *
  2184. * **Operations**:\n
  2185. * ~~~
  2186. * if (is `KHM8`) {
  2187. * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
  2188. * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
  2189. * } else if (is `KHMX8`) {
  2190. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
  2191. * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
  2192. * }
  2193. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  2194. * if (0x80 != aop | 0x80 != bop) {
  2195. * res = (aop s* bop) >> 7;
  2196. * } else {
  2197. * res= 0x7F;
  2198. * OV = 1;
  2199. * }
  2200. * }
  2201. * Rd.H[x/2] = concat(rest, resb);
  2202. * for RV32, x=0,2
  2203. * for RV64, x=0,2,4,6
  2204. * ~~~
  2205. *
  2206. * \param [in] a unsigned long type of value stored in a
  2207. * \param [in] b unsigned long type of value stored in b
  2208. * \return value stored in unsigned long type
  2209. */
  2210. __STATIC_FORCEINLINE unsigned long __RV_KHM8(unsigned long a, unsigned long b)
  2211. {
  2212. unsigned long result;
  2213. __ASM volatile("khm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  2214. return result;
  2215. }
  2216. /* ===== Inline Function End for 3.35.1. KHM8 ===== */
  2217. /* ===== Inline Function Start for 3.35.2. KHMX8 ===== */
  2218. /**
  2219. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
  2220. * \brief KHMX8 (SIMD Signed Saturating Crossed Q7 Multiply)
  2221. * \details
  2222. * **Type**: SIMD
  2223. *
  2224. * **Syntax**:\n
  2225. * ~~~
  2226. * KHM8 Rd, Rs1, Rs2
  2227. * KHMX8 Rd, Rs1, Rs2
  2228. * ~~~
  2229. *
  2230. * **Purpose**:\n
  2231. * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
  2232. * numbers again.
  2233. *
  2234. * **Description**:\n
  2235. * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
  2236. * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
  2237. * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
  2238. * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
  2239. * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
  2240. * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
  2241. * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
  2242. * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
  2243. * The result will be saturated to 0x7F and the overflow flag OV will be set.
  2244. *
  2245. * **Operations**:\n
  2246. * ~~~
  2247. * if (is `KHM8`) {
  2248. * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
  2249. * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
  2250. * } else if (is `KHMX8`) {
  2251. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
  2252. * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
  2253. * }
  2254. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  2255. * if (0x80 != aop | 0x80 != bop) {
  2256. * res = (aop s* bop) >> 7;
  2257. * } else {
  2258. * res= 0x7F;
  2259. * OV = 1;
  2260. * }
  2261. * }
  2262. * Rd.H[x/2] = concat(rest, resb);
  2263. * for RV32, x=0,2
  2264. * for RV64, x=0,2,4,6
  2265. * ~~~
  2266. *
  2267. * \param [in] a unsigned long type of value stored in a
  2268. * \param [in] b unsigned long type of value stored in b
  2269. * \return value stored in unsigned long type
  2270. */
  2271. __STATIC_FORCEINLINE unsigned long __RV_KHMX8(unsigned long a, unsigned long b)
  2272. {
  2273. unsigned long result;
  2274. __ASM volatile("khmx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  2275. return result;
  2276. }
  2277. /* ===== Inline Function End for 3.35.2. KHMX8 ===== */
  2278. /* ===== Inline Function Start for 3.36.1. KHM16 ===== */
  2279. /**
  2280. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
  2281. * \brief KHM16 (SIMD Signed Saturating Q15 Multiply)
  2282. * \details
  2283. * **Type**: SIMD
  2284. *
  2285. * **Syntax**:\n
  2286. * ~~~
  2287. * KHM16 Rd, Rs1, Rs2
  2288. * KHMX16 Rd, Rs1, Rs2
  2289. * ~~~
  2290. *
  2291. * **Purpose**:\n
  2292. * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
  2293. * Q15 numbers again.
  2294. *
  2295. * **Description**:\n
  2296. * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
  2297. * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
  2298. * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
  2299. * Rs2.
  2300. * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
  2301. * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
  2302. * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
  2303. * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
  2304. * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
  2305. * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
  2306. *
  2307. * **Operations**:\n
  2308. * ~~~
  2309. * if (is `KHM16`) {
  2310. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
  2311. * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
  2312. * } else if (is `KHMX16`) {
  2313. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
  2314. * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
  2315. * }
  2316. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  2317. * if (0x8000 != aop | 0x8000 != bop) {
  2318. * res = (aop s* bop) >> 15;
  2319. * } else {
  2320. * res= 0x7FFF;
  2321. * OV = 1;
  2322. * }
  2323. * }
  2324. * Rd.W[x/2] = concat(rest, resb);
  2325. * for RV32: x=0
  2326. * for RV64: x=0,2
  2327. * ~~~
  2328. *
  2329. * \param [in] a unsigned long type of value stored in a
  2330. * \param [in] b unsigned long type of value stored in b
  2331. * \return value stored in unsigned long type
  2332. */
  2333. __STATIC_FORCEINLINE unsigned long __RV_KHM16(unsigned long a, unsigned long b)
  2334. {
  2335. unsigned long result;
  2336. __ASM volatile("khm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  2337. return result;
  2338. }
  2339. /* ===== Inline Function End for 3.36.1. KHM16 ===== */
  2340. /* ===== Inline Function Start for 3.36.2. KHMX16 ===== */
  2341. /**
  2342. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
  2343. * \brief KHMX16 (SIMD Signed Saturating Crossed Q15 Multiply)
  2344. * \details
  2345. * **Type**: SIMD
  2346. *
  2347. * **Syntax**:\n
  2348. * ~~~
  2349. * KHM16 Rd, Rs1, Rs2
  2350. * KHMX16 Rd, Rs1, Rs2
  2351. * ~~~
  2352. *
  2353. * **Purpose**:\n
  2354. * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
  2355. * Q15 numbers again.
  2356. *
  2357. * **Description**:\n
  2358. * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
  2359. * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
  2360. * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
  2361. * Rs2.
  2362. * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
  2363. * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
  2364. * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
  2365. * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
  2366. * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
  2367. * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
  2368. *
  2369. * **Operations**:\n
  2370. * ~~~
  2371. * if (is `KHM16`) {
  2372. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
  2373. * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
  2374. * } else if (is `KHMX16`) {
  2375. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
  2376. * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
  2377. * }
  2378. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  2379. * if (0x8000 != aop | 0x8000 != bop) {
  2380. * res = (aop s* bop) >> 15;
  2381. * } else {
  2382. * res= 0x7FFF;
  2383. * OV = 1;
  2384. * }
  2385. * }
  2386. * Rd.W[x/2] = concat(rest, resb);
  2387. * for RV32: x=0
  2388. * for RV64: x=0,2
  2389. * ~~~
  2390. *
  2391. * \param [in] a unsigned long type of value stored in a
  2392. * \param [in] b unsigned long type of value stored in b
  2393. * \return value stored in unsigned long type
  2394. */
  2395. __STATIC_FORCEINLINE unsigned long __RV_KHMX16(unsigned long a, unsigned long b)
  2396. {
  2397. unsigned long result;
  2398. __ASM volatile("khmx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  2399. return result;
  2400. }
  2401. /* ===== Inline Function End for 3.36.2. KHMX16 ===== */
  2402. /* ===== Inline Function Start for 3.37.1. KHMBB ===== */
  2403. /**
  2404. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
  2405. * \brief KHMBB (Signed Saturating Half Multiply B16 x B16)
  2406. * \details
  2407. * **Type**: DSP
  2408. *
  2409. * **Syntax**:\n
  2410. * ~~~
  2411. * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  2412. * ~~~
  2413. *
  2414. * **Purpose**:\n
  2415. * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
  2416. * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
  2417. * number again and saturate the Q15 result into the destination register. If saturation happens, an
  2418. * overflow flag OV will be set.
  2419. *
  2420. * **Description**:\n
  2421. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  2422. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
  2423. * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
  2424. * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
  2425. * to 0x7FFF and the overflow flag OV will be set.
  2426. *
  2427. * **Operations**:\n
  2428. * ~~~
  2429. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
  2430. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
  2431. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
  2432. * If (0x8000 != aop | 0x8000 != bop) {
  2433. * Mresult[31:0] = aop * bop;
  2434. * res[15:0] = Mresult[30:15];
  2435. * } else {
  2436. * res[15:0] = 0x7FFF;
  2437. * OV = 1;
  2438. * }
  2439. * Rd = SE32(res[15:0]); // Rv32
  2440. * Rd = SE64(res[15:0]); // RV64
  2441. * ~~~
  2442. *
  2443. * \param [in] a unsigned int type of value stored in a
  2444. * \param [in] b unsigned int type of value stored in b
  2445. * \return value stored in long type
  2446. */
  2447. __STATIC_FORCEINLINE long __RV_KHMBB(unsigned int a, unsigned int b)
  2448. {
  2449. long result;
  2450. __ASM volatile("khmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  2451. return result;
  2452. }
  2453. /* ===== Inline Function End for 3.37.1. KHMBB ===== */
  2454. /* ===== Inline Function Start for 3.37.2. KHMBT ===== */
  2455. /**
  2456. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
  2457. * \brief KHMBT (Signed Saturating Half Multiply B16 x T16)
  2458. * \details
  2459. * **Type**: DSP
  2460. *
  2461. * **Syntax**:\n
  2462. * ~~~
  2463. * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  2464. * ~~~
  2465. *
  2466. * **Purpose**:\n
  2467. * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
  2468. * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
  2469. * number again and saturate the Q15 result into the destination register. If saturation happens, an
  2470. * overflow flag OV will be set.
  2471. *
  2472. * **Description**:\n
  2473. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  2474. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
  2475. * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
  2476. * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
  2477. * to 0x7FFF and the overflow flag OV will be set.
  2478. *
  2479. * **Operations**:\n
  2480. * ~~~
  2481. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
  2482. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
  2483. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
  2484. * If (0x8000 != aop | 0x8000 != bop) {
  2485. * Mresult[31:0] = aop * bop;
  2486. * res[15:0] = Mresult[30:15];
  2487. * } else {
  2488. * res[15:0] = 0x7FFF;
  2489. * OV = 1;
  2490. * }
  2491. * Rd = SE32(res[15:0]); // Rv32
  2492. * Rd = SE64(res[15:0]); // RV64
  2493. * ~~~
  2494. *
  2495. * \param [in] a unsigned int type of value stored in a
  2496. * \param [in] b unsigned int type of value stored in b
  2497. * \return value stored in long type
  2498. */
  2499. __STATIC_FORCEINLINE long __RV_KHMBT(unsigned int a, unsigned int b)
  2500. {
  2501. long result;
  2502. __ASM volatile("khmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  2503. return result;
  2504. }
  2505. /* ===== Inline Function End for 3.37.2. KHMBT ===== */
  2506. /* ===== Inline Function Start for 3.37.3. KHMTT ===== */
  2507. /**
  2508. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
  2509. * \brief KHMTT (Signed Saturating Half Multiply T16 x T16)
  2510. * \details
  2511. * **Type**: DSP
  2512. *
  2513. * **Syntax**:\n
  2514. * ~~~
  2515. * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  2516. * ~~~
  2517. *
  2518. * **Purpose**:\n
  2519. * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
  2520. * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
  2521. * number again and saturate the Q15 result into the destination register. If saturation happens, an
  2522. * overflow flag OV will be set.
  2523. *
  2524. * **Description**:\n
  2525. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  2526. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
  2527. * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
  2528. * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
  2529. * to 0x7FFF and the overflow flag OV will be set.
  2530. *
  2531. * **Operations**:\n
  2532. * ~~~
  2533. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
  2534. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
  2535. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
  2536. * If (0x8000 != aop | 0x8000 != bop) {
  2537. * Mresult[31:0] = aop * bop;
  2538. * res[15:0] = Mresult[30:15];
  2539. * } else {
  2540. * res[15:0] = 0x7FFF;
  2541. * OV = 1;
  2542. * }
  2543. * Rd = SE32(res[15:0]); // Rv32
  2544. * Rd = SE64(res[15:0]); // RV64
  2545. * ~~~
  2546. *
  2547. * \param [in] a unsigned int type of value stored in a
  2548. * \param [in] b unsigned int type of value stored in b
  2549. * \return value stored in long type
  2550. */
  2551. __STATIC_FORCEINLINE long __RV_KHMTT(unsigned int a, unsigned int b)
  2552. {
  2553. long result;
  2554. __ASM volatile("khmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  2555. return result;
  2556. }
  2557. /* ===== Inline Function End for 3.37.3. KHMTT ===== */
  2558. /* ===== Inline Function Start for 3.38.1. KMABB ===== */
  2559. /**
  2560. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  2561. * \brief KMABB (SIMD Saturating Signed Multiply Bottom Halfs & Add)
  2562. * \details
  2563. * **Type**: SIMD
  2564. *
  2565. * **Syntax**:\n
  2566. * ~~~
  2567. * KMABB Rd, Rs1, Rs2
  2568. * KMABT Rd, Rs1, Rs2
  2569. * KMATT Rd, Rs1, Rs2
  2570. * ~~~
  2571. *
  2572. * **Purpose**:\n
  2573. * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
  2574. * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
  2575. * third register. The addition result may be saturated and is written to the third register.
  2576. * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
  2577. * * KMABT rd.W[x] + bottom*top (per 32-bit element)
  2578. * * KMATT rd.W[x] + top*top (per 32-bit element)
  2579. *
  2580. * **Description**:\n
  2581. * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2582. * the bottom 16-bit content of 32-bit elements in Rs2.
  2583. * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2584. * the top 16-bit content of 32-bit elements in Rs2.
  2585. * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  2586. * top 16-bit content of 32-bit elements in Rs2.
  2587. * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
  2588. * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
  2589. * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
  2590. * signed integers.
  2591. *
  2592. * **Operations**:\n
  2593. * ~~~
  2594. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
  2595. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
  2596. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
  2597. * if (res[x] > (2^31)-1) {
  2598. * res[x] = (2^31)-1;
  2599. * OV = 1;
  2600. * } else if (res[x] < -2^31) {
  2601. * res[x] = -2^31;
  2602. * OV = 1;
  2603. * }
  2604. * Rd.W[x] = res[x];
  2605. * for RV32: x=0
  2606. * for RV64: x=1...0
  2607. * ~~~
  2608. *
  2609. * \param [in] t long type of value stored in t
  2610. * \param [in] a unsigned long type of value stored in a
  2611. * \param [in] b unsigned long type of value stored in b
  2612. * \return value stored in long type
  2613. */
  2614. __STATIC_FORCEINLINE long __RV_KMABB(long t, unsigned long a, unsigned long b)
  2615. {
  2616. __ASM volatile("kmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2617. return t;
  2618. }
  2619. /* ===== Inline Function End for 3.38.1. KMABB ===== */
  2620. /* ===== Inline Function Start for 3.38.2. KMABT ===== */
  2621. /**
  2622. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  2623. * \brief KMABT (SIMD Saturating Signed Multiply Bottom & Top Halfs & Add)
  2624. * \details
  2625. * **Type**: SIMD
  2626. *
  2627. * **Syntax**:\n
  2628. * ~~~
  2629. * KMABB Rd, Rs1, Rs2
  2630. * KMABT Rd, Rs1, Rs2
  2631. * KMATT Rd, Rs1, Rs2
  2632. * ~~~
  2633. *
  2634. * **Purpose**:\n
  2635. * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
  2636. * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
  2637. * third register. The addition result may be saturated and is written to the third register.
  2638. * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
  2639. * * KMABT rd.W[x] + bottom*top (per 32-bit element)
  2640. * * KMATT rd.W[x] + top*top (per 32-bit element)
  2641. *
  2642. * **Description**:\n
  2643. * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2644. * the bottom 16-bit content of 32-bit elements in Rs2.
  2645. * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2646. * the top 16-bit content of 32-bit elements in Rs2.
  2647. * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  2648. * top 16-bit content of 32-bit elements in Rs2.
  2649. * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
  2650. * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
  2651. * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
  2652. * signed integers.
  2653. *
  2654. * **Operations**:\n
  2655. * ~~~
  2656. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
  2657. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
  2658. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
  2659. * if (res[x] > (2^31)-1) {
  2660. * res[x] = (2^31)-1;
  2661. * OV = 1;
  2662. * } else if (res[x] < -2^31) {
  2663. * res[x] = -2^31;
  2664. * OV = 1;
  2665. * }
  2666. * Rd.W[x] = res[x];
  2667. * for RV32: x=0
  2668. * for RV64: x=1...0
  2669. * ~~~
  2670. *
  2671. * \param [in] t long type of value stored in t
  2672. * \param [in] a unsigned long type of value stored in a
  2673. * \param [in] b unsigned long type of value stored in b
  2674. * \return value stored in long type
  2675. */
  2676. __STATIC_FORCEINLINE long __RV_KMABT(long t, unsigned long a, unsigned long b)
  2677. {
  2678. __ASM volatile("kmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2679. return t;
  2680. }
  2681. /* ===== Inline Function End for 3.38.2. KMABT ===== */
  2682. /* ===== Inline Function Start for 3.38.3. KMATT ===== */
  2683. /**
  2684. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  2685. * \brief KMATT (SIMD Saturating Signed Multiply Top Halfs & Add)
  2686. * \details
  2687. * **Type**: SIMD
  2688. *
  2689. * **Syntax**:\n
  2690. * ~~~
  2691. * KMABB Rd, Rs1, Rs2
  2692. * KMABT Rd, Rs1, Rs2
  2693. * KMATT Rd, Rs1, Rs2
  2694. * ~~~
  2695. *
  2696. * **Purpose**:\n
  2697. * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
  2698. * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
  2699. * third register. The addition result may be saturated and is written to the third register.
  2700. * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
  2701. * * KMABT rd.W[x] + bottom*top (per 32-bit element)
  2702. * * KMATT rd.W[x] + top*top (per 32-bit element)
  2703. *
  2704. * **Description**:\n
  2705. * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2706. * the bottom 16-bit content of 32-bit elements in Rs2.
  2707. * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2708. * the top 16-bit content of 32-bit elements in Rs2.
  2709. * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  2710. * top 16-bit content of 32-bit elements in Rs2.
  2711. * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
  2712. * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
  2713. * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
  2714. * signed integers.
  2715. *
  2716. * **Operations**:\n
  2717. * ~~~
  2718. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
  2719. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
  2720. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
  2721. * if (res[x] > (2^31)-1) {
  2722. * res[x] = (2^31)-1;
  2723. * OV = 1;
  2724. * } else if (res[x] < -2^31) {
  2725. * res[x] = -2^31;
  2726. * OV = 1;
  2727. * }
  2728. * Rd.W[x] = res[x];
  2729. * for RV32: x=0
  2730. * for RV64: x=1...0
  2731. * ~~~
  2732. *
  2733. * \param [in] t long type of value stored in t
  2734. * \param [in] a unsigned long type of value stored in a
  2735. * \param [in] b unsigned long type of value stored in b
  2736. * \return value stored in long type
  2737. */
  2738. __STATIC_FORCEINLINE long __RV_KMATT(long t, unsigned long a, unsigned long b)
  2739. {
  2740. __ASM volatile("kmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2741. return t;
  2742. }
  2743. /* ===== Inline Function End for 3.38.3. KMATT ===== */
  2744. /* ===== Inline Function Start for 3.39.1. KMADA ===== */
  2745. /**
  2746. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  2747. * \brief KMADA (SIMD Saturating Signed Multiply Two Halfs and Two Adds)
  2748. * \details
  2749. * **Type**: SIMD
  2750. *
  2751. * **Syntax**:\n
  2752. * ~~~
  2753. * KMADA Rd, Rs1, Rs2
  2754. * KMAXDA Rd, Rs1, Rs2
  2755. * ~~~
  2756. *
  2757. * **Purpose**:\n
  2758. * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds
  2759. * the two 32-bit results and 32-bit elements in a third register together. The addition result may be
  2760. * saturated.
  2761. * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element)
  2762. * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element)
  2763. *
  2764. * **Description**:\n
  2765. * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2766. * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
  2767. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
  2768. * elements in Rs2.
  2769. * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  2770. * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying
  2771. * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in
  2772. * Rs2.
  2773. * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31
  2774. * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit
  2775. * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
  2776. * integers.
  2777. *
  2778. * **Operations**:\n
  2779. * ~~~
  2780. * // KMADA
  2781. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  2782. * // KMAXDA
  2783. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  2784. * if (res[x] > (2^31)-1) {
  2785. * res[x] = (2^31)-1;
  2786. * OV = 1;
  2787. * } else if (res[x] < -2^31) {
  2788. * res[x] = -2^31;
  2789. * OV = 1;
  2790. * }
  2791. * Rd.W[x] = res[x];
  2792. * for RV32: x=0
  2793. * for RV64: x=1...0
  2794. * ~~~
  2795. *
  2796. * \param [in] t long type of value stored in t
  2797. * \param [in] a unsigned long type of value stored in a
  2798. * \param [in] b unsigned long type of value stored in b
  2799. * \return value stored in long type
  2800. */
  2801. __STATIC_FORCEINLINE long __RV_KMADA(long t, unsigned long a, unsigned long b)
  2802. {
  2803. __ASM volatile("kmada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2804. return t;
  2805. }
  2806. /* ===== Inline Function End for 3.39.1. KMADA ===== */
  2807. /* ===== Inline Function Start for 3.39.2. KMAXDA ===== */
  2808. /**
  2809. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  2810. * \brief KMAXDA (SIMD Saturating Signed Crossed Multiply Two Halfs and Two Adds)
  2811. * \details
  2812. * **Type**: SIMD
  2813. *
  2814. * **Syntax**:\n
  2815. * ~~~
  2816. * KMADA Rd, Rs1, Rs2
  2817. * KMAXDA Rd, Rs1, Rs2
  2818. * ~~~
  2819. *
  2820. * **Purpose**:\n
  2821. * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds
  2822. * the two 32-bit results and 32-bit elements in a third register together. The addition result may be
  2823. * saturated.
  2824. * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element)
  2825. * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element)
  2826. *
  2827. * **Description**:\n
  2828. * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2829. * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
  2830. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
  2831. * elements in Rs2.
  2832. * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  2833. * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying
  2834. * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in
  2835. * Rs2.
  2836. * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31
  2837. * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit
  2838. * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
  2839. * integers.
  2840. *
  2841. * **Operations**:\n
  2842. * ~~~
  2843. * // KMADA
  2844. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  2845. * // KMAXDA
  2846. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  2847. * if (res[x] > (2^31)-1) {
  2848. * res[x] = (2^31)-1;
  2849. * OV = 1;
  2850. * } else if (res[x] < -2^31) {
  2851. * res[x] = -2^31;
  2852. * OV = 1;
  2853. * }
  2854. * Rd.W[x] = res[x];
  2855. * for RV32: x=0
  2856. * for RV64: x=1...0
  2857. * ~~~
  2858. *
  2859. * \param [in] t long type of value stored in t
  2860. * \param [in] a unsigned long type of value stored in a
  2861. * \param [in] b unsigned long type of value stored in b
  2862. * \return value stored in long type
  2863. */
  2864. __STATIC_FORCEINLINE long __RV_KMAXDA(long t, unsigned long a, unsigned long b)
  2865. {
  2866. __ASM volatile("kmaxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2867. return t;
  2868. }
  2869. /* ===== Inline Function End for 3.39.2. KMAXDA ===== */
  2870. /* ===== Inline Function Start for 3.40.1. KMADS ===== */
  2871. /**
  2872. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  2873. * \brief KMADS (SIMD Saturating Signed Multiply Two Halfs & Subtract & Add)
  2874. * \details
  2875. * **Type**: SIMD
  2876. *
  2877. * **Syntax**:\n
  2878. * ~~~
  2879. * KMADS Rd, Rs1, Rs2
  2880. * KMADRS Rd, Rs1, Rs2
  2881. * KMAXDS Rd, Rs1, Rs2
  2882. * ~~~
  2883. *
  2884. * **Purpose**:\n
  2885. * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
  2886. * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
  2887. * the corresponding 32-bit elements in a third register. The addition result may be saturated.
  2888. * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
  2889. * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
  2890. * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
  2891. *
  2892. * **Description**:\n
  2893. * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2894. * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  2895. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
  2896. * elements in Rs2.
  2897. * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  2898. * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  2899. * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
  2900. * bit elements in Rs2.
  2901. * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2902. * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  2903. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
  2904. * elements in Rs2.
  2905. * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
  2906. * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
  2907. * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
  2908. * and Rs2 are treated as signed integers.
  2909. *
  2910. * **Operations**:\n
  2911. * ~~~
  2912. * // KMADS
  2913. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  2914. * // KMADRS
  2915. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
  2916. * // KMAXDS
  2917. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  2918. * if (res[x] > (2^31)-1) {
  2919. * res[x] = (2^31)-1;
  2920. * OV = 1;
  2921. * } else if (res[x] < -2^31) {
  2922. * res[x] = -2^31;
  2923. * OV = 1;
  2924. * }
  2925. * Rd.W[x] = res[x];
  2926. * for RV32: x=0
  2927. * for RV64: x=1...0
  2928. * ~~~
  2929. *
  2930. * \param [in] t long type of value stored in t
  2931. * \param [in] a unsigned long type of value stored in a
  2932. * \param [in] b unsigned long type of value stored in b
  2933. * \return value stored in long type
  2934. */
  2935. __STATIC_FORCEINLINE long __RV_KMADS(long t, unsigned long a, unsigned long b)
  2936. {
  2937. __ASM volatile("kmads %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2938. return t;
  2939. }
  2940. /* ===== Inline Function End for 3.40.1. KMADS ===== */
  2941. /* ===== Inline Function Start for 3.40.2. KMADRS ===== */
  2942. /**
  2943. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  2944. * \brief KMADRS (SIMD Saturating Signed Multiply Two Halfs & Reverse Subtract & Add)
  2945. * \details
  2946. * **Type**: SIMD
  2947. *
  2948. * **Syntax**:\n
  2949. * ~~~
  2950. * KMADS Rd, Rs1, Rs2
  2951. * KMADRS Rd, Rs1, Rs2
  2952. * KMAXDS Rd, Rs1, Rs2
  2953. * ~~~
  2954. *
  2955. * **Purpose**:\n
  2956. * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
  2957. * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
  2958. * the corresponding 32-bit elements in a third register. The addition result may be saturated.
  2959. * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
  2960. * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
  2961. * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
  2962. *
  2963. * **Description**:\n
  2964. * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2965. * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  2966. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
  2967. * elements in Rs2.
  2968. * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  2969. * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  2970. * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
  2971. * bit elements in Rs2.
  2972. * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2973. * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  2974. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
  2975. * elements in Rs2.
  2976. * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
  2977. * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
  2978. * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
  2979. * and Rs2 are treated as signed integers.
  2980. *
  2981. * **Operations**:\n
  2982. * ~~~
  2983. * // KMADS
  2984. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  2985. * // KMADRS
  2986. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
  2987. * // KMAXDS
  2988. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  2989. * if (res[x] > (2^31)-1) {
  2990. * res[x] = (2^31)-1;
  2991. * OV = 1;
  2992. * } else if (res[x] < -2^31) {
  2993. * res[x] = -2^31;
  2994. * OV = 1;
  2995. * }
  2996. * Rd.W[x] = res[x];
  2997. * for RV32: x=0
  2998. * for RV64: x=1...0
  2999. * ~~~
  3000. *
  3001. * \param [in] t long type of value stored in t
  3002. * \param [in] a unsigned long type of value stored in a
  3003. * \param [in] b unsigned long type of value stored in b
  3004. * \return value stored in long type
  3005. */
  3006. __STATIC_FORCEINLINE long __RV_KMADRS(long t, unsigned long a, unsigned long b)
  3007. {
  3008. __ASM volatile("kmadrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3009. return t;
  3010. }
  3011. /* ===== Inline Function End for 3.40.2. KMADRS ===== */
  3012. /* ===== Inline Function Start for 3.40.3. KMAXDS ===== */
  3013. /**
  3014. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  3015. * \brief KMAXDS (SIMD Saturating Signed Crossed Multiply Two Halfs & Subtract & Add)
  3016. * \details
  3017. * **Type**: SIMD
  3018. *
  3019. * **Syntax**:\n
  3020. * ~~~
  3021. * KMADS Rd, Rs1, Rs2
  3022. * KMADRS Rd, Rs1, Rs2
  3023. * KMAXDS Rd, Rs1, Rs2
  3024. * ~~~
  3025. *
  3026. * **Purpose**:\n
  3027. * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
  3028. * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
  3029. * the corresponding 32-bit elements in a third register. The addition result may be saturated.
  3030. * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
  3031. * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
  3032. * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
  3033. *
  3034. * **Description**:\n
  3035. * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  3036. * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  3037. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
  3038. * elements in Rs2.
  3039. * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  3040. * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  3041. * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
  3042. * bit elements in Rs2.
  3043. * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  3044. * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  3045. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
  3046. * elements in Rs2.
  3047. * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
  3048. * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
  3049. * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
  3050. * and Rs2 are treated as signed integers.
  3051. *
  3052. * **Operations**:\n
  3053. * ~~~
  3054. * // KMADS
  3055. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  3056. * // KMADRS
  3057. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
  3058. * // KMAXDS
  3059. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  3060. * if (res[x] > (2^31)-1) {
  3061. * res[x] = (2^31)-1;
  3062. * OV = 1;
  3063. * } else if (res[x] < -2^31) {
  3064. * res[x] = -2^31;
  3065. * OV = 1;
  3066. * }
  3067. * Rd.W[x] = res[x];
  3068. * for RV32: x=0
  3069. * for RV64: x=1...0
  3070. * ~~~
  3071. *
  3072. * \param [in] t long type of value stored in t
  3073. * \param [in] a unsigned long type of value stored in a
  3074. * \param [in] b unsigned long type of value stored in b
  3075. * \return value stored in long type
  3076. */
  3077. __STATIC_FORCEINLINE long __RV_KMAXDS(long t, unsigned long a, unsigned long b)
  3078. {
  3079. __ASM volatile("kmaxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3080. return t;
  3081. }
  3082. /* ===== Inline Function End for 3.40.3. KMAXDS ===== */
  3083. /* ===== Inline Function Start for 3.41. KMAR64 ===== */
  3084. /**
  3085. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  3086. * \brief KMAR64 (Signed Multiply and Saturating Add to 64-Bit Data)
  3087. * \details
  3088. * **Type**: DSP (64-bit Profile)
  3089. *
  3090. * **Syntax**:\n
  3091. * ~~~
  3092. * KMAR64 Rd, Rs1, Rs2
  3093. * ~~~
  3094. *
  3095. * **Purpose**:\n
  3096. * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication
  3097. * results to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
  3098. * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64).
  3099. *
  3100. * **RV32 Description**:\n
  3101. * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds
  3102. * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by
  3103. * Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the Q63 number range (-2^63 <=
  3104. * Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated result is written back
  3105. * to the even/odd pair of registers specified by Rd(4,1).
  3106. * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
  3107. * pair includes register 2d and 2d+1.
  3108. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  3109. * of the pair contains the low 32-bit of the result.
  3110. *
  3111. * **RV64 Description**:\n
  3112. * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
  3113. * adds the 64-bit multiplication results to the 64-bit signed data of Rd with unlimited precision. If the
  3114. * 64-bit addition result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range
  3115. * and the OV bit is set to 1. The saturated result is written back to Rd.
  3116. *
  3117. * **Operations**:\n
  3118. * ~~~
  3119. * RV32:
  3120. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  3121. * result = R[t_H].R[t_L] + (Rs1 * Rs2);
  3122. * if (result > (2^63)-1) {
  3123. * result = (2^63)-1; OV = 1;
  3124. * } else if (result < -2^63) {
  3125. * result = -2^63; OV = 1;
  3126. * }
  3127. * R[t_H].R[t_L] = result;
  3128. * RV64:
  3129. * // `result` has unlimited precision
  3130. * result = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]);
  3131. * if (result > (2^63)-1) {
  3132. * result = (2^63)-1; OV = 1;
  3133. * } else if (result < -2^63) {
  3134. * result = -2^63; OV = 1;
  3135. * }
  3136. * Rd = result;
  3137. * ~~~
  3138. *
  3139. * \param [in] t long long type of value stored in t
  3140. * \param [in] a long type of value stored in a
  3141. * \param [in] b long type of value stored in b
  3142. * \return value stored in long long type
  3143. */
  3144. __STATIC_FORCEINLINE long long __RV_KMAR64(long long t, long a, long b)
  3145. {
  3146. __ASM volatile("kmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3147. return t;
  3148. }
  3149. /* ===== Inline Function End for 3.41. KMAR64 ===== */
  3150. /* ===== Inline Function Start for 3.42.1. KMDA ===== */
  3151. /**
  3152. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  3153. * \brief KMDA (SIMD Signed Multiply Two Halfs and Add)
  3154. * \details
  3155. * **Type**: SIMD
  3156. *
  3157. * **Syntax**:\n
  3158. * ~~~
  3159. * KMDA Rd, Rs1, Rs2
  3160. * KMXDA Rd, Rs1, Rs2
  3161. * ~~~
  3162. *
  3163. * **Purpose**:\n
  3164. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  3165. * adds the two 32-bit results together. The addition result may be saturated.
  3166. * * KMDA: top*top + bottom*bottom (per 32-bit element)
  3167. * * KMXDA: top*bottom + bottom*top (per 32-bit element)
  3168. *
  3169. * **Description**:\n
  3170. * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  3171. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  3172. * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
  3173. * bit elements of Rs2.
  3174. * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  3175. * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  3176. * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
  3177. * 32-bit elements of Rs2.
  3178. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1.
  3179. * The final results are written to Rd. The 16-bit contents are treated as signed integers.
  3180. *
  3181. * **Operations**:\n
  3182. * ~~~
  3183. * if Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000 { // KMDA Rd.W[x] = Rs1.W[x].H[1] *
  3184. * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0])
  3185. * + (Rs1.W[x].H[0] * Rs2.W[x].H[1]; } else { Rd.W[x] = 0x7fffffff; OV = 1; } for RV32: x=0 for RV64:
  3186. * x=1...0
  3187. * ~~~
  3188. *
  3189. * \param [in] a unsigned long type of value stored in a
  3190. * \param [in] b unsigned long type of value stored in b
  3191. * \return value stored in long type
  3192. */
  3193. __STATIC_FORCEINLINE long __RV_KMDA(unsigned long a, unsigned long b)
  3194. {
  3195. long result;
  3196. __ASM volatile("kmda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  3197. return result;
  3198. }
  3199. /* ===== Inline Function End for 3.42.1. KMDA ===== */
  3200. /* ===== Inline Function Start for 3.42.2. KMXDA ===== */
  3201. /**
  3202. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  3203. * \brief KMXDA (SIMD Signed Crossed Multiply Two Halfs and Add)
  3204. * \details
  3205. * **Type**: SIMD
  3206. *
  3207. * **Syntax**:\n
  3208. * ~~~
  3209. * KMDA Rd, Rs1, Rs2
  3210. * KMXDA Rd, Rs1, Rs2
  3211. * ~~~
  3212. *
  3213. * **Purpose**:\n
  3214. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  3215. * adds the two 32-bit results together. The addition result may be saturated.
  3216. * * KMDA: top*top + bottom*bottom (per 32-bit element)
  3217. * * KMXDA: top*bottom + bottom*top (per 32-bit element)
  3218. *
  3219. * **Description**:\n
  3220. * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  3221. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  3222. * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
  3223. * bit elements of Rs2.
  3224. * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  3225. * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  3226. * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
  3227. * 32-bit elements of Rs2.
  3228. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1.
  3229. * The final results are written to Rd. The 16-bit contents are treated as signed integers.
  3230. *
  3231. * **Operations**:\n
  3232. * ~~~
  3233. * if Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000 { // KMDA Rd.W[x] = Rs1.W[x].H[1] *
  3234. * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0])
  3235. * + (Rs1.W[x].H[0] * Rs2.W[x].H[1]; } else { Rd.W[x] = 0x7fffffff; OV = 1; } for RV32: x=0 for RV64:
  3236. * x=1...0
  3237. * ~~~
  3238. *
  3239. * \param [in] a unsigned long type of value stored in a
  3240. * \param [in] b unsigned long type of value stored in b
  3241. * \return value stored in long type
  3242. */
  3243. __STATIC_FORCEINLINE long __RV_KMXDA(unsigned long a, unsigned long b)
  3244. {
  3245. long result;
  3246. __ASM volatile("kmxda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  3247. return result;
  3248. }
  3249. /* ===== Inline Function End for 3.42.2. KMXDA ===== */
  3250. /* ===== Inline Function Start for 3.43.1. KMMAC ===== */
  3251. /**
  3252. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  3253. * \brief KMMAC (SIMD Saturating MSW Signed Multiply Word and Add)
  3254. * \details
  3255. * **Type**: SIMD
  3256. *
  3257. * **Syntax**:\n
  3258. * ~~~
  3259. * KMMAC Rd, Rs1, Rs2
  3260. * KMMAC.u Rd, Rs1, Rs2
  3261. * ~~~
  3262. *
  3263. * **Purpose**:\n
  3264. * Multiply the signed 32-bit integer elements of two registers and add the most significant
  3265. * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are
  3266. * saturated first and then written back to the third register. The `.u` form performs an additional
  3267. * rounding up operation on the multiplication results before adding the most significant 32-bit part
  3268. * of the results.
  3269. *
  3270. * **Description**:\n
  3271. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  3272. * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
  3273. * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
  3274. * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
  3275. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  3276. * adding a 1 to bit 31 of the results.
  3277. *
  3278. * **Operations**:\n
  3279. * ~~~
  3280. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  3281. * if (`.u` form) {
  3282. * Round[x][32:0] = Mres[x][63:31] + 1;
  3283. * res[x] = Rd.W[x] + Round[x][32:1];
  3284. * } else {
  3285. * res[x] = Rd.W[x] + Mres[x][63:32];
  3286. * }
  3287. * if (res[x] > (2^31)-1) {
  3288. * res[x] = (2^31)-1;
  3289. * OV = 1;
  3290. * } else if (res[x] < -2^31) {
  3291. * res[x] = -2^31;
  3292. * OV = 1;
  3293. * }
  3294. * Rd.W[x] = res[x];
  3295. * for RV32: x=0
  3296. * for RV64: x=1...0
  3297. * ~~~
  3298. *
  3299. * \param [in] t long type of value stored in t
  3300. * \param [in] a long type of value stored in a
  3301. * \param [in] b long type of value stored in b
  3302. * \return value stored in long type
  3303. */
  3304. __STATIC_FORCEINLINE long __RV_KMMAC(long t, long a, long b)
  3305. {
  3306. __ASM volatile("kmmac %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3307. return t;
  3308. }
  3309. /* ===== Inline Function End for 3.43.1. KMMAC ===== */
  3310. /* ===== Inline Function Start for 3.43.2. KMMAC.u ===== */
  3311. /**
  3312. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  3313. * \brief KMMAC.u (SIMD Saturating MSW Signed Multiply Word and Add with Rounding)
  3314. * \details
  3315. * **Type**: SIMD
  3316. *
  3317. * **Syntax**:\n
  3318. * ~~~
  3319. * KMMAC Rd, Rs1, Rs2
  3320. * KMMAC.u Rd, Rs1, Rs2
  3321. * ~~~
  3322. *
  3323. * **Purpose**:\n
  3324. * Multiply the signed 32-bit integer elements of two registers and add the most significant
  3325. * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are
  3326. * saturated first and then written back to the third register. The `.u` form performs an additional
  3327. * rounding up operation on the multiplication results before adding the most significant 32-bit part
  3328. * of the results.
  3329. *
  3330. * **Description**:\n
  3331. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  3332. * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
  3333. * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
  3334. * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
  3335. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  3336. * adding a 1 to bit 31 of the results.
  3337. *
  3338. * **Operations**:\n
  3339. * ~~~
  3340. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  3341. * if (`.u` form) {
  3342. * Round[x][32:0] = Mres[x][63:31] + 1;
  3343. * res[x] = Rd.W[x] + Round[x][32:1];
  3344. * } else {
  3345. * res[x] = Rd.W[x] + Mres[x][63:32];
  3346. * }
  3347. * if (res[x] > (2^31)-1) {
  3348. * res[x] = (2^31)-1;
  3349. * OV = 1;
  3350. * } else if (res[x] < -2^31) {
  3351. * res[x] = -2^31;
  3352. * OV = 1;
  3353. * }
  3354. * Rd.W[x] = res[x];
  3355. * for RV32: x=0
  3356. * for RV64: x=1...0
  3357. * ~~~
  3358. *
  3359. * \param [in] t long type of value stored in t
  3360. * \param [in] a long type of value stored in a
  3361. * \param [in] b long type of value stored in b
  3362. * \return value stored in long type
  3363. */
  3364. __STATIC_FORCEINLINE long __RV_KMMAC_U(long t, long a, long b)
  3365. {
  3366. __ASM volatile("kmmac.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3367. return t;
  3368. }
  3369. /* ===== Inline Function End for 3.43.2. KMMAC.u ===== */
  3370. /* ===== Inline Function Start for 3.44.1. KMMAWB ===== */
  3371. /**
  3372. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3373. * \brief KMMAWB (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add)
  3374. * \details
  3375. * **Type**: SIMD
  3376. *
  3377. * **Syntax**:\n
  3378. * ~~~
  3379. * KMMAWB Rd, Rs1, Rs2
  3380. * KMMAWB.u Rd, Rs1, Rs2
  3381. * ~~~
  3382. *
  3383. * **Purpose**:\n
  3384. * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
  3385. * corresponding 32-bit elements of another register and add the most significant 32-bit results with
  3386. * the corresponding signed 32-bit elements of a third register. The addition result is written to the
  3387. * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
  3388. * results from the most significant discarded bit before the addition operations.
  3389. *
  3390. * **Description**:\n
  3391. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
  3392. * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication
  3393. * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
  3394. * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
  3395. * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
  3396. * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
  3397. * bit 15 of the result before the addition operations.
  3398. *
  3399. * **Operations**:\n
  3400. * ~~~
  3401. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
  3402. * if (`.u` form) {
  3403. * Round[x][32:0] = Mres[x][47:15] + 1;
  3404. * res[x] = Rd.W[x] + Round[x][32:1];
  3405. * } else {
  3406. * res[x] = Rd.W[x] + Mres[x][47:16];
  3407. * }
  3408. * if (res[x] > (2^31)-1) {
  3409. * res[x] = (2^31)-1;
  3410. * OV = 1;
  3411. * } else if (res[x] < -2^31) {
  3412. * res[x] = -2^31;
  3413. * OV = 1;
  3414. * }
  3415. * Rd.W[x] = res[x];
  3416. * for RV32: x=0
  3417. * for RV64: x=1...0
  3418. * ~~~
  3419. *
  3420. * \param [in] t long type of value stored in t
  3421. * \param [in] a unsigned long type of value stored in a
  3422. * \param [in] b unsigned long type of value stored in b
  3423. * \return value stored in long type
  3424. */
  3425. __STATIC_FORCEINLINE long __RV_KMMAWB(long t, unsigned long a, unsigned long b)
  3426. {
  3427. __ASM volatile("kmmawb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3428. return t;
  3429. }
  3430. /* ===== Inline Function End for 3.44.1. KMMAWB ===== */
  3431. /* ===== Inline Function Start for 3.44.2. KMMAWB.u ===== */
  3432. /**
  3433. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3434. * \brief KMMAWB.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add with Rounding)
  3435. * \details
  3436. * **Type**: SIMD
  3437. *
  3438. * **Syntax**:\n
  3439. * ~~~
  3440. * KMMAWB Rd, Rs1, Rs2
  3441. * KMMAWB.u Rd, Rs1, Rs2
  3442. * ~~~
  3443. *
  3444. * **Purpose**:\n
  3445. * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
  3446. * corresponding 32-bit elements of another register and add the most significant 32-bit results with
  3447. * the corresponding signed 32-bit elements of a third register. The addition result is written to the
  3448. * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
  3449. * results from the most significant discarded bit before the addition operations.
  3450. *
  3451. * **Description**:\n
  3452. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
  3453. * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication
  3454. * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
  3455. * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
  3456. * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
  3457. * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
  3458. * bit 15 of the result before the addition operations.
  3459. *
  3460. * **Operations**:\n
  3461. * ~~~
  3462. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
  3463. * if (`.u` form) {
  3464. * Round[x][32:0] = Mres[x][47:15] + 1;
  3465. * res[x] = Rd.W[x] + Round[x][32:1];
  3466. * } else {
  3467. * res[x] = Rd.W[x] + Mres[x][47:16];
  3468. * }
  3469. * if (res[x] > (2^31)-1) {
  3470. * res[x] = (2^31)-1;
  3471. * OV = 1;
  3472. * } else if (res[x] < -2^31) {
  3473. * res[x] = -2^31;
  3474. * OV = 1;
  3475. * }
  3476. * Rd.W[x] = res[x];
  3477. * for RV32: x=0
  3478. * for RV64: x=1...0
  3479. * ~~~
  3480. *
  3481. * \param [in] t long type of value stored in t
  3482. * \param [in] a unsigned long type of value stored in a
  3483. * \param [in] b unsigned long type of value stored in b
  3484. * \return value stored in long type
  3485. */
  3486. __STATIC_FORCEINLINE long __RV_KMMAWB_U(long t, unsigned long a, unsigned long b)
  3487. {
  3488. __ASM volatile("kmmawb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3489. return t;
  3490. }
  3491. /* ===== Inline Function End for 3.44.2. KMMAWB.u ===== */
  3492. /* ===== Inline Function Start for 3.45.1. KMMAWB2 ===== */
  3493. /**
  3494. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3495. * \brief KMMAWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add)
  3496. * \details
  3497. * **Type**: SIMD
  3498. *
  3499. * **Syntax**:\n
  3500. * ~~~
  3501. * KMMAWB2 Rd, Rs1, Rs2
  3502. * KMMAWB2.u Rd, Rs1, Rs2
  3503. * ~~~
  3504. *
  3505. * **Purpose**:\n
  3506. * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the
  3507. * corresponding 32-bit elements of another register, double the multiplication results and add the
  3508. * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
  3509. * register. The saturated addition result is written to the corresponding 32-bit elements of the third
  3510. * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
  3511. * before the addition operations.
  3512. *
  3513. * **Description**:\n
  3514. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
  3515. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  3516. * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
  3517. * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
  3518. * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
  3519. * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
  3520. * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
  3521. * the result before the addition operations.
  3522. *
  3523. * **Operations**:\n
  3524. * ~~~
  3525. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
  3526. * addop.W[x] = 0x7fffffff;
  3527. * OV = 1;
  3528. * } else {
  3529. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
  3530. * if (`.u` form) {
  3531. * Mres[x][47:14] = Mres[x][47:14] + 1;
  3532. * }
  3533. * addop.W[x] = Mres[x][46:15]; // doubling
  3534. * }
  3535. * res[x] = Rd.W[x] + addop.W[x];
  3536. * if (res[x] > (2^31)-1) {
  3537. * res[x] = (2^31)-1;
  3538. * OV = 1;
  3539. * } else if (res[x] < -2^31) {
  3540. * res[x] = -2^31;
  3541. * OV = 1;
  3542. * }
  3543. * Rd.W[x] = res[x];
  3544. * for RV32: x=0
  3545. * for RV64: x=1...0
  3546. * ~~~
  3547. *
  3548. * \param [in] t long type of value stored in t
  3549. * \param [in] a unsigned long type of value stored in a
  3550. * \param [in] b unsigned long type of value stored in b
  3551. * \return value stored in long type
  3552. */
  3553. __STATIC_FORCEINLINE long __RV_KMMAWB2(long t, unsigned long a, unsigned long b)
  3554. {
  3555. __ASM volatile("kmmawb2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3556. return t;
  3557. }
  3558. /* ===== Inline Function End for 3.45.1. KMMAWB2 ===== */
  3559. /* ===== Inline Function Start for 3.45.2. KMMAWB2.u ===== */
  3560. /**
  3561. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3562. * \brief KMMAWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add with Rounding)
  3563. * \details
  3564. * **Type**: SIMD
  3565. *
  3566. * **Syntax**:\n
  3567. * ~~~
  3568. * KMMAWB2 Rd, Rs1, Rs2
  3569. * KMMAWB2.u Rd, Rs1, Rs2
  3570. * ~~~
  3571. *
  3572. * **Purpose**:\n
  3573. * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the
  3574. * corresponding 32-bit elements of another register, double the multiplication results and add the
  3575. * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
  3576. * register. The saturated addition result is written to the corresponding 32-bit elements of the third
  3577. * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
  3578. * before the addition operations.
  3579. *
  3580. * **Description**:\n
  3581. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
  3582. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  3583. * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
  3584. * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
  3585. * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
  3586. * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
  3587. * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
  3588. * the result before the addition operations.
  3589. *
  3590. * **Operations**:\n
  3591. * ~~~
  3592. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
  3593. * addop.W[x] = 0x7fffffff;
  3594. * OV = 1;
  3595. * } else {
  3596. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
  3597. * if (`.u` form) {
  3598. * Mres[x][47:14] = Mres[x][47:14] + 1;
  3599. * }
  3600. * addop.W[x] = Mres[x][46:15]; // doubling
  3601. * }
  3602. * res[x] = Rd.W[x] + addop.W[x];
  3603. * if (res[x] > (2^31)-1) {
  3604. * res[x] = (2^31)-1;
  3605. * OV = 1;
  3606. * } else if (res[x] < -2^31) {
  3607. * res[x] = -2^31;
  3608. * OV = 1;
  3609. * }
  3610. * Rd.W[x] = res[x];
  3611. * for RV32: x=0
  3612. * for RV64: x=1...0
  3613. * ~~~
  3614. *
  3615. * \param [in] t long type of value stored in t
  3616. * \param [in] a unsigned long type of value stored in a
  3617. * \param [in] b unsigned long type of value stored in b
  3618. * \return value stored in long type
  3619. */
  3620. __STATIC_FORCEINLINE long __RV_KMMAWB2_U(long t, unsigned long a, unsigned long b)
  3621. {
  3622. __ASM volatile("kmmawb2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3623. return t;
  3624. }
  3625. /* ===== Inline Function End for 3.45.2. KMMAWB2.u ===== */
  3626. /* ===== Inline Function Start for 3.46.1. KMMAWT ===== */
  3627. /**
  3628. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3629. * \brief KMMAWT (SIMD Saturating MSW Signed Multiply Word and Top Half and Add)
  3630. * \details
  3631. * **Type**: SIMD
  3632. *
  3633. * **Syntax**:\n
  3634. * ~~~
  3635. * KMMAWT Rd, Rs1, Rs2
  3636. * KMMAWT.u Rd Rs1, Rs2
  3637. * ~~~
  3638. *
  3639. * **Purpose**:\n
  3640. * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the
  3641. * corresponding 32-bit elements of another register and add the most significant 32-bit results with
  3642. * the corresponding signed 32-bit elements of a third register. The addition results are written to the
  3643. * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
  3644. * results from the most significant discarded bit before the addition operations.
  3645. *
  3646. * **Description**:\n
  3647. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the
  3648. * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results
  3649. * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
  3650. * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
  3651. * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
  3652. * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
  3653. * bit 15 of the result before the addition operations.
  3654. *
  3655. * **Operations**:\n
  3656. * ~~~
  3657. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
  3658. * if (`.u` form) {
  3659. * Round[x][32:0] = Mres[x][47:15] + 1;
  3660. * res[x] = Rd.W[x] + Round[x][32:1];
  3661. * } else {
  3662. * res[x] = Rd.W[x] + Mres[x][47:16];
  3663. * }
  3664. * if (res[x] > (2^31)-1) {
  3665. * res[x] = (2^31)-1;
  3666. * OV = 1;
  3667. * } else if (res[x] < -2^31) {
  3668. * res[x] = -2^31;
  3669. * OV = 1;
  3670. * }
  3671. * Rd.W[x] = res[x];
  3672. * for RV32: x=0
  3673. * for RV64: x=1...0
  3674. * ~~~
  3675. *
  3676. * \param [in] t long type of value stored in t
  3677. * \param [in] a unsigned long type of value stored in a
  3678. * \param [in] b unsigned long type of value stored in b
  3679. * \return value stored in long type
  3680. */
  3681. __STATIC_FORCEINLINE long __RV_KMMAWT(long t, unsigned long a, unsigned long b)
  3682. {
  3683. __ASM volatile("kmmawt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3684. return t;
  3685. }
  3686. /* ===== Inline Function End for 3.46.1. KMMAWT ===== */
  3687. /* ===== Inline Function Start for 3.46.2. KMMAWT.u ===== */
  3688. /**
  3689. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3690. * \brief KMMAWT.u (SIMD Saturating MSW Signed Multiply Word and Top Half and Add with Rounding)
  3691. * \details
  3692. * **Type**: SIMD
  3693. *
  3694. * **Syntax**:\n
  3695. * ~~~
  3696. * KMMAWT Rd, Rs1, Rs2
  3697. * KMMAWT.u Rd Rs1, Rs2
  3698. * ~~~
  3699. *
  3700. * **Purpose**:\n
  3701. * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the
  3702. * corresponding 32-bit elements of another register and add the most significant 32-bit results with
  3703. * the corresponding signed 32-bit elements of a third register. The addition results are written to the
  3704. * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
  3705. * results from the most significant discarded bit before the addition operations.
  3706. *
  3707. * **Description**:\n
  3708. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the
  3709. * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results
  3710. * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
  3711. * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
  3712. * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
  3713. * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
  3714. * bit 15 of the result before the addition operations.
  3715. *
  3716. * **Operations**:\n
  3717. * ~~~
  3718. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
  3719. * if (`.u` form) {
  3720. * Round[x][32:0] = Mres[x][47:15] + 1;
  3721. * res[x] = Rd.W[x] + Round[x][32:1];
  3722. * } else {
  3723. * res[x] = Rd.W[x] + Mres[x][47:16];
  3724. * }
  3725. * if (res[x] > (2^31)-1) {
  3726. * res[x] = (2^31)-1;
  3727. * OV = 1;
  3728. * } else if (res[x] < -2^31) {
  3729. * res[x] = -2^31;
  3730. * OV = 1;
  3731. * }
  3732. * Rd.W[x] = res[x];
  3733. * for RV32: x=0
  3734. * for RV64: x=1...0
  3735. * ~~~
  3736. *
  3737. * \param [in] t long type of value stored in t
  3738. * \param [in] a unsigned long type of value stored in a
  3739. * \param [in] b unsigned long type of value stored in b
  3740. * \return value stored in long type
  3741. */
  3742. __STATIC_FORCEINLINE long __RV_KMMAWT_U(long t, unsigned long a, unsigned long b)
  3743. {
  3744. __ASM volatile("kmmawt.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3745. return t;
  3746. }
  3747. /* ===== Inline Function End for 3.46.2. KMMAWT.u ===== */
  3748. /* ===== Inline Function Start for 3.47.1. KMMAWT2 ===== */
  3749. /**
  3750. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3751. * \brief KMMAWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add)
  3752. * \details
  3753. * **Type**: SIMD
  3754. *
  3755. * **Syntax**:\n
  3756. * ~~~
  3757. * KMMAWT2 Rd, Rs1, Rs2
  3758. * KMMAWT2.u Rd, Rs1, Rs2
  3759. * ~~~
  3760. *
  3761. * **Purpose**:\n
  3762. * Multiply the signed 32-bit elements of one register and the top 16-bit of the
  3763. * corresponding 32-bit elements of another register, double the multiplication results and add the
  3764. * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
  3765. * register. The saturated addition result is written to the corresponding 32-bit elements of the third
  3766. * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
  3767. * before the addition operations.
  3768. *
  3769. * **Description**:\n
  3770. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
  3771. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  3772. * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
  3773. * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
  3774. * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
  3775. * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
  3776. * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
  3777. * the result before the addition operations.
  3778. *
  3779. * **Operations**:\n
  3780. * ~~~
  3781. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
  3782. * addop.W[x] = 0x7fffffff;
  3783. * OV = 1;
  3784. * } else {
  3785. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
  3786. * if (`.u` form) {
  3787. * Mres[x][47:14] = Mres[x][47:14] + 1;
  3788. * }
  3789. * addop.W[x] = Mres[x][46:15]; // doubling
  3790. * }
  3791. * res[x] = Rd.W[x] + addop.W[x];
  3792. * if (res[x] > (2^31)-1) {
  3793. * res[x] = (2^31)-1;
  3794. * OV = 1;
  3795. * } else if (res[x] < -2^31) {
  3796. * res[x] = -2^31;
  3797. * OV = 1;
  3798. * }
  3799. * Rd.W[x] = res[x];
  3800. * for RV32: x=0
  3801. * for RV64: x=1...0
  3802. * ~~~
  3803. *
  3804. * \param [in] t long type of value stored in t
  3805. * \param [in] a unsigned long type of value stored in a
  3806. * \param [in] b unsigned long type of value stored in b
  3807. * \return value stored in long type
  3808. */
  3809. __STATIC_FORCEINLINE long __RV_KMMAWT2(long t, unsigned long a, unsigned long b)
  3810. {
  3811. __ASM volatile("kmmawt2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3812. return t;
  3813. }
  3814. /* ===== Inline Function End for 3.47.1. KMMAWT2 ===== */
  3815. /* ===== Inline Function Start for 3.47.2. KMMAWT2.u ===== */
  3816. /**
  3817. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3818. * \brief KMMAWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add with Rounding)
  3819. * \details
  3820. * **Type**: SIMD
  3821. *
  3822. * **Syntax**:\n
  3823. * ~~~
  3824. * KMMAWT2 Rd, Rs1, Rs2
  3825. * KMMAWT2.u Rd, Rs1, Rs2
  3826. * ~~~
  3827. *
  3828. * **Purpose**:\n
  3829. * Multiply the signed 32-bit elements of one register and the top 16-bit of the
  3830. * corresponding 32-bit elements of another register, double the multiplication results and add the
  3831. * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
  3832. * register. The saturated addition result is written to the corresponding 32-bit elements of the third
  3833. * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
  3834. * before the addition operations.
  3835. *
  3836. * **Description**:\n
  3837. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
  3838. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  3839. * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
  3840. * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
  3841. * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
  3842. * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
  3843. * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
  3844. * the result before the addition operations.
  3845. *
  3846. * **Operations**:\n
  3847. * ~~~
  3848. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
  3849. * addop.W[x] = 0x7fffffff;
  3850. * OV = 1;
  3851. * } else {
  3852. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
  3853. * if (`.u` form) {
  3854. * Mres[x][47:14] = Mres[x][47:14] + 1;
  3855. * }
  3856. * addop.W[x] = Mres[x][46:15]; // doubling
  3857. * }
  3858. * res[x] = Rd.W[x] + addop.W[x];
  3859. * if (res[x] > (2^31)-1) {
  3860. * res[x] = (2^31)-1;
  3861. * OV = 1;
  3862. * } else if (res[x] < -2^31) {
  3863. * res[x] = -2^31;
  3864. * OV = 1;
  3865. * }
  3866. * Rd.W[x] = res[x];
  3867. * for RV32: x=0
  3868. * for RV64: x=1...0
  3869. * ~~~
  3870. *
  3871. * \param [in] t long type of value stored in t
  3872. * \param [in] a unsigned long type of value stored in a
  3873. * \param [in] b unsigned long type of value stored in b
  3874. * \return value stored in long type
  3875. */
  3876. __STATIC_FORCEINLINE long __RV_KMMAWT2_U(long t, unsigned long a, unsigned long b)
  3877. {
  3878. __ASM volatile("kmmawt2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3879. return t;
  3880. }
  3881. /* ===== Inline Function End for 3.47.2. KMMAWT2.u ===== */
  3882. /* ===== Inline Function Start for 3.48.1. KMMSB ===== */
  3883. /**
  3884. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  3885. * \brief KMMSB (SIMD Saturating MSW Signed Multiply Word and Subtract)
  3886. * \details
  3887. * **Type**: SIMD
  3888. *
  3889. * **Syntax**:\n
  3890. * ~~~
  3891. * KMMSB Rd, Rs1, Rs2
  3892. * KMMSB.u Rd, Rs1, Rs2
  3893. * ~~~
  3894. *
  3895. * **Purpose**:\n
  3896. * Multiply the signed 32-bit integer elements of two registers and subtract the most
  3897. * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results
  3898. * are written to the third register. The `.u` form performs an additional rounding up operation on
  3899. * the multiplication results before subtracting the most significant 32-bit part of the results.
  3900. *
  3901. * **Description**:\n
  3902. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  3903. * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
  3904. * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
  3905. * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
  3906. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  3907. * adding a 1 to bit 31 of the results.
  3908. *
  3909. * **Operations**:\n
  3910. * ~~~
  3911. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  3912. * if (`.u` form) {
  3913. * Round[x][32:0] = Mres[x][63:31] + 1;
  3914. * res[x] = Rd.W[x] - Round[x][32:1];
  3915. * } else {
  3916. * res[x] = Rd.W[x] - Mres[x][63:32];
  3917. * }
  3918. * if (res[x] > (2^31)-1) {
  3919. * res[x] = (2^31)-1;
  3920. * OV = 1;
  3921. * } else if (res[x] < -2^31) {
  3922. * res[x] = -2^31;
  3923. * OV = 1;
  3924. * }
  3925. * Rd.W[x] = res[x];
  3926. * for RV32: x=0
  3927. * for RV64: x=1...0
  3928. * ~~~
  3929. *
  3930. * \param [in] t long type of value stored in t
  3931. * \param [in] a long type of value stored in a
  3932. * \param [in] b long type of value stored in b
  3933. * \return value stored in long type
  3934. */
  3935. __STATIC_FORCEINLINE long __RV_KMMSB(long t, long a, long b)
  3936. {
  3937. __ASM volatile("kmmsb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3938. return t;
  3939. }
  3940. /* ===== Inline Function End for 3.48.1. KMMSB ===== */
  3941. /* ===== Inline Function Start for 3.48.2. KMMSB.u ===== */
  3942. /**
  3943. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  3944. * \brief KMMSB.u (SIMD Saturating MSW Signed Multiply Word and Subtraction with Rounding)
  3945. * \details
  3946. * **Type**: SIMD
  3947. *
  3948. * **Syntax**:\n
  3949. * ~~~
  3950. * KMMSB Rd, Rs1, Rs2
  3951. * KMMSB.u Rd, Rs1, Rs2
  3952. * ~~~
  3953. *
  3954. * **Purpose**:\n
  3955. * Multiply the signed 32-bit integer elements of two registers and subtract the most
  3956. * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results
  3957. * are written to the third register. The `.u` form performs an additional rounding up operation on
  3958. * the multiplication results before subtracting the most significant 32-bit part of the results.
  3959. *
  3960. * **Description**:\n
  3961. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  3962. * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
  3963. * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
  3964. * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
  3965. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  3966. * adding a 1 to bit 31 of the results.
  3967. *
  3968. * **Operations**:\n
  3969. * ~~~
  3970. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  3971. * if (`.u` form) {
  3972. * Round[x][32:0] = Mres[x][63:31] + 1;
  3973. * res[x] = Rd.W[x] - Round[x][32:1];
  3974. * } else {
  3975. * res[x] = Rd.W[x] - Mres[x][63:32];
  3976. * }
  3977. * if (res[x] > (2^31)-1) {
  3978. * res[x] = (2^31)-1;
  3979. * OV = 1;
  3980. * } else if (res[x] < -2^31) {
  3981. * res[x] = -2^31;
  3982. * OV = 1;
  3983. * }
  3984. * Rd.W[x] = res[x];
  3985. * for RV32: x=0
  3986. * for RV64: x=1...0
  3987. * ~~~
  3988. *
  3989. * \param [in] t long type of value stored in t
  3990. * \param [in] a long type of value stored in a
  3991. * \param [in] b long type of value stored in b
  3992. * \return value stored in long type
  3993. */
  3994. __STATIC_FORCEINLINE long __RV_KMMSB_U(long t, long a, long b)
  3995. {
  3996. __ASM volatile("kmmsb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3997. return t;
  3998. }
  3999. /* ===== Inline Function End for 3.48.2. KMMSB.u ===== */
  4000. /* ===== Inline Function Start for 3.49.1. KMMWB2 ===== */
  4001. /**
  4002. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  4003. * \brief KMMWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2)
  4004. * \details
  4005. * **Type**: SIMD
  4006. *
  4007. * **Syntax**:\n
  4008. * ~~~
  4009. * KMMWB2 Rd, Rs1, Rs2
  4010. * KMMWB2.u Rd, Rs1, Rs2
  4011. * ~~~
  4012. *
  4013. * **Purpose**:\n
  4014. * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
  4015. * corresponding 32-bit elements of another register, double the multiplication results and write the
  4016. * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
  4017. * form rounds up the results from the most significant discarded bit.
  4018. *
  4019. * **Description**:\n
  4020. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
  4021. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  4022. * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
  4023. * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
  4024. * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
  4025. *
  4026. * **Operations**:\n
  4027. * ~~~
  4028. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
  4029. * Rd.W[x] = 0x7fffffff;
  4030. * OV = 1;
  4031. * } else {
  4032. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
  4033. * if (`.u` form) {
  4034. * Round[x][32:0] = Mres[x][46:14] + 1;
  4035. * Rd.W[x] = Round[x][32:1];
  4036. * } else {
  4037. * Rd.W[x] = Mres[x][46:15];
  4038. * }
  4039. * }
  4040. * for RV32: x=0
  4041. * for RV64: x=1...0
  4042. * ~~~
  4043. *
  4044. * \param [in] a long type of value stored in a
  4045. * \param [in] b unsigned long type of value stored in b
  4046. * \return value stored in long type
  4047. */
  4048. __STATIC_FORCEINLINE long __RV_KMMWB2(long a, unsigned long b)
  4049. {
  4050. long result;
  4051. __ASM volatile("kmmwb2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4052. return result;
  4053. }
  4054. /* ===== Inline Function End for 3.49.1. KMMWB2 ===== */
  4055. /* ===== Inline Function Start for 3.49.2. KMMWB2.u ===== */
  4056. /**
  4057. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  4058. * \brief KMMWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 with Rounding)
  4059. * \details
  4060. * **Type**: SIMD
  4061. *
  4062. * **Syntax**:\n
  4063. * ~~~
  4064. * KMMWB2 Rd, Rs1, Rs2
  4065. * KMMWB2.u Rd, Rs1, Rs2
  4066. * ~~~
  4067. *
  4068. * **Purpose**:\n
  4069. * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
  4070. * corresponding 32-bit elements of another register, double the multiplication results and write the
  4071. * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
  4072. * form rounds up the results from the most significant discarded bit.
  4073. *
  4074. * **Description**:\n
  4075. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
  4076. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  4077. * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
  4078. * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
  4079. * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
  4080. *
  4081. * **Operations**:\n
  4082. * ~~~
  4083. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
  4084. * Rd.W[x] = 0x7fffffff;
  4085. * OV = 1;
  4086. * } else {
  4087. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
  4088. * if (`.u` form) {
  4089. * Round[x][32:0] = Mres[x][46:14] + 1;
  4090. * Rd.W[x] = Round[x][32:1];
  4091. * } else {
  4092. * Rd.W[x] = Mres[x][46:15];
  4093. * }
  4094. * }
  4095. * for RV32: x=0
  4096. * for RV64: x=1...0
  4097. * ~~~
  4098. *
  4099. * \param [in] a long type of value stored in a
  4100. * \param [in] b unsigned long type of value stored in b
  4101. * \return value stored in long type
  4102. */
  4103. __STATIC_FORCEINLINE long __RV_KMMWB2_U(long a, unsigned long b)
  4104. {
  4105. long result;
  4106. __ASM volatile("kmmwb2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4107. return result;
  4108. }
  4109. /* ===== Inline Function End for 3.49.2. KMMWB2.u ===== */
  4110. /* ===== Inline Function Start for 3.50.1. KMMWT2 ===== */
  4111. /**
  4112. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  4113. * \brief KMMWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2)
  4114. * \details
  4115. * **Type**: SIMD
  4116. *
  4117. * **Syntax**:\n
  4118. * ~~~
  4119. * KMMWT2 Rd, Rs1, Rs2
  4120. * KMMWT2.u Rd, Rs1, Rs2
  4121. * ~~~
  4122. *
  4123. * **Purpose**:\n
  4124. * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
  4125. * corresponding 32-bit elements of another register, double the multiplication results and write the
  4126. * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
  4127. * form rounds up the results from the most significant discarded bit.
  4128. *
  4129. * **Description**:\n
  4130. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
  4131. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  4132. * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
  4133. * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
  4134. * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
  4135. *
  4136. * **Operations**:\n
  4137. * ~~~
  4138. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
  4139. * Rd.W[x] = 0x7fffffff;
  4140. * OV = 1;
  4141. * } else {
  4142. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
  4143. * if (`.u` form) {
  4144. * Round[x][32:0] = Mres[x][46:14] + 1;
  4145. * Rd.W[x] = Round[x][32:1];
  4146. * } else {
  4147. * Rd.W[x] = Mres[x][46:15];
  4148. * }
  4149. * }
  4150. * for RV32: x=0
  4151. * for RV64: x=1...0
  4152. * ~~~
  4153. *
  4154. * \param [in] a long type of value stored in a
  4155. * \param [in] b unsigned long type of value stored in b
  4156. * \return value stored in long type
  4157. */
  4158. __STATIC_FORCEINLINE long __RV_KMMWT2(long a, unsigned long b)
  4159. {
  4160. long result;
  4161. __ASM volatile("kmmwt2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4162. return result;
  4163. }
  4164. /* ===== Inline Function End for 3.50.1. KMMWT2 ===== */
  4165. /* ===== Inline Function Start for 3.50.2. KMMWT2.u ===== */
  4166. /**
  4167. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  4168. * \brief KMMWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 with Rounding)
  4169. * \details
  4170. * **Type**: SIMD
  4171. *
  4172. * **Syntax**:\n
  4173. * ~~~
  4174. * KMMWT2 Rd, Rs1, Rs2
  4175. * KMMWT2.u Rd, Rs1, Rs2
  4176. * ~~~
  4177. *
  4178. * **Purpose**:\n
  4179. * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
  4180. * corresponding 32-bit elements of another register, double the multiplication results and write the
  4181. * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
  4182. * form rounds up the results from the most significant discarded bit.
  4183. *
  4184. * **Description**:\n
  4185. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
  4186. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  4187. * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
  4188. * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
  4189. * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
  4190. *
  4191. * **Operations**:\n
  4192. * ~~~
  4193. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
  4194. * Rd.W[x] = 0x7fffffff;
  4195. * OV = 1;
  4196. * } else {
  4197. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
  4198. * if (`.u` form) {
  4199. * Round[x][32:0] = Mres[x][46:14] + 1;
  4200. * Rd.W[x] = Round[x][32:1];
  4201. * } else {
  4202. * Rd.W[x] = Mres[x][46:15];
  4203. * }
  4204. * }
  4205. * for RV32: x=0
  4206. * for RV64: x=1...0
  4207. * ~~~
  4208. *
  4209. * \param [in] a long type of value stored in a
  4210. * \param [in] b unsigned long type of value stored in b
  4211. * \return value stored in long type
  4212. */
  4213. __STATIC_FORCEINLINE long __RV_KMMWT2_U(long a, unsigned long b)
  4214. {
  4215. long result;
  4216. __ASM volatile("kmmwt2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4217. return result;
  4218. }
  4219. /* ===== Inline Function End for 3.50.2. KMMWT2.u ===== */
  4220. /* ===== Inline Function Start for 3.51.1. KMSDA ===== */
  4221. /**
  4222. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  4223. * \brief KMSDA (SIMD Saturating Signed Multiply Two Halfs & Add & Subtract)
  4224. * \details
  4225. * **Type**: SIMD
  4226. *
  4227. * **Syntax**:\n
  4228. * ~~~
  4229. * KMSDA Rd, Rs1, Rs2
  4230. * KMSXDA Rd, Rs1, Rs2
  4231. * ~~~
  4232. *
  4233. * **Purpose**:\n
  4234. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  4235. * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The
  4236. * subtraction result may be saturated.
  4237. * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element)
  4238. * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element)
  4239. *
  4240. * **Description**:\n
  4241. * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  4242. * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
  4243. * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  4244. * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  4245. * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
  4246. * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
  4247. * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32-
  4248. * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
  4249. * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The
  4250. * 16-bit contents are treated as signed integers.
  4251. *
  4252. * **Operations**:\n
  4253. * ~~~
  4254. * // KMSDA
  4255. * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  4256. * // KMSXDA
  4257. * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  4258. * if (res[x] > (2^31)-1) {
  4259. * res[x] = (2^31)-1;
  4260. * OV = 1;
  4261. * } else if (res[x] < -2^31) {
  4262. * res[x] = -2^31;
  4263. * OV = 1;
  4264. * }
  4265. * Rd.W[x] = res[x];
  4266. * for RV32: x=0
  4267. * for RV64: x=1...0
  4268. * ~~~
  4269. *
  4270. * \param [in] t long type of value stored in t
  4271. * \param [in] a unsigned long type of value stored in a
  4272. * \param [in] b unsigned long type of value stored in b
  4273. * \return value stored in long type
  4274. */
  4275. __STATIC_FORCEINLINE long __RV_KMSDA(long t, unsigned long a, unsigned long b)
  4276. {
  4277. __ASM volatile("kmsda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  4278. return t;
  4279. }
  4280. /* ===== Inline Function End for 3.51.1. KMSDA ===== */
  4281. /* ===== Inline Function Start for 3.51.2. KMSXDA ===== */
  4282. /**
  4283. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  4284. * \brief KMSXDA (SIMD Saturating Signed Crossed Multiply Two Halfs & Add & Subtract)
  4285. * \details
  4286. * **Type**: SIMD
  4287. *
  4288. * **Syntax**:\n
  4289. * ~~~
  4290. * KMSDA Rd, Rs1, Rs2
  4291. * KMSXDA Rd, Rs1, Rs2
  4292. * ~~~
  4293. *
  4294. * **Purpose**:\n
  4295. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  4296. * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The
  4297. * subtraction result may be saturated.
  4298. * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element)
  4299. * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element)
  4300. *
  4301. * **Description**:\n
  4302. * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  4303. * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
  4304. * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  4305. * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  4306. * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
  4307. * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
  4308. * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32-
  4309. * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
  4310. * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The
  4311. * 16-bit contents are treated as signed integers.
  4312. *
  4313. * **Operations**:\n
  4314. * ~~~
  4315. * // KMSDA
  4316. * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  4317. * // KMSXDA
  4318. * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  4319. * if (res[x] > (2^31)-1) {
  4320. * res[x] = (2^31)-1;
  4321. * OV = 1;
  4322. * } else if (res[x] < -2^31) {
  4323. * res[x] = -2^31;
  4324. * OV = 1;
  4325. * }
  4326. * Rd.W[x] = res[x];
  4327. * for RV32: x=0
  4328. * for RV64: x=1...0
  4329. * ~~~
  4330. *
  4331. * \param [in] t long type of value stored in t
  4332. * \param [in] a unsigned long type of value stored in a
  4333. * \param [in] b unsigned long type of value stored in b
  4334. * \return value stored in long type
  4335. */
  4336. __STATIC_FORCEINLINE long __RV_KMSXDA(long t, unsigned long a, unsigned long b)
  4337. {
  4338. __ASM volatile("kmsxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  4339. return t;
  4340. }
  4341. /* ===== Inline Function End for 3.51.2. KMSXDA ===== */
  4342. /* ===== Inline Function Start for 3.52. KMSR64 ===== */
  4343. /**
  4344. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  4345. * \brief KMSR64 (Signed Multiply and Saturating Subtract from 64-Bit Data)
  4346. * \details
  4347. * **Type**: DSP (64-bit Profile)
  4348. *
  4349. * **Syntax**:\n
  4350. * ~~~
  4351. * KMSR64 Rd, Rs1, Rs2
  4352. * ~~~
  4353. *
  4354. * **Purpose**:\n
  4355. * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication
  4356. * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
  4357. * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64).
  4358. *
  4359. * **RV32 Description**:\n
  4360. * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It
  4361. * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers
  4362. * specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the Q63
  4363. * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated
  4364. * result is written back to the even/odd pair of registers specified by Rd(4,1).
  4365. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  4366. * includes register 2d and 2d+1.
  4367. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  4368. * of the pair contains the low 32-bit of the result.
  4369. *
  4370. * **RV64 Description**:\n
  4371. * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
  4372. * subtracts the 64-bit multiplication results from the 64-bit signed data in Rd with unlimited
  4373. * precision. If the 64-bit subtraction result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is
  4374. * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd.
  4375. *
  4376. * **Operations**:\n
  4377. * ~~~
  4378. * RV32:
  4379. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  4380. * result = R[t_H].R[t_L] - (Rs1 * Rs2);
  4381. * if (result > (2^63)-1) {
  4382. * result = (2^63)-1; OV = 1;
  4383. * } else if (result < -2^63) {
  4384. * result = -2^63; OV = 1;
  4385. * }
  4386. * R[t_H].R[t_L] = result;
  4387. * RV64:
  4388. * // `result` has unlimited precision
  4389. * result = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]);
  4390. * if (result > (2^63)-1) {
  4391. * result = (2^63)-1; OV = 1;
  4392. * } else if (result < -2^63) {
  4393. * result = -2^63; OV = 1;
  4394. * }
  4395. * Rd = result;
  4396. * ~~~
  4397. *
  4398. * \param [in] t long long type of value stored in t
  4399. * \param [in] a long type of value stored in a
  4400. * \param [in] b long type of value stored in b
  4401. * \return value stored in long long type
  4402. */
  4403. __STATIC_FORCEINLINE long long __RV_KMSR64(long long t, long a, long b)
  4404. {
  4405. __ASM volatile("kmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  4406. return t;
  4407. }
  4408. /* ===== Inline Function End for 3.52. KMSR64 ===== */
  4409. /* ===== Inline Function Start for 3.53. KSLLW ===== */
  4410. /**
  4411. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  4412. * \brief KSLLW (Saturating Shift Left Logical for Word)
  4413. * \details
  4414. * **Type**: DSP
  4415. *
  4416. * **Syntax**:\n
  4417. * ~~~
  4418. * KSLLW Rd, Rs1, Rs2
  4419. * ~~~
  4420. *
  4421. * **Purpose**:\n
  4422. * Do logical left shift operation with saturation on a 32-bit word. The shift amount is a
  4423. * variable from a GPR.
  4424. *
  4425. * **Description**:\n
  4426. * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with
  4427. * zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register. Any
  4428. * shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated
  4429. * to -2^31. And the saturated result is sign-extended and written to Rd. If any saturation is performed,
  4430. * set OV bit to 1.
  4431. *
  4432. * **Operations**:\n
  4433. * ~~~
  4434. * sa = Rs2[4:0];
  4435. * res[(31+sa):0] = Rs1.W[0] << sa;
  4436. * if (res > (2^31)-1) {
  4437. * res = 0x7fffffff; OV = 1;
  4438. * } else if (res < -2^31) {
  4439. * res = 0x80000000; OV = 1;
  4440. * }
  4441. * Rd[31:0] = res[31:0]; // RV32
  4442. * Rd[63:0] = SE(res[31:0]); // RV64
  4443. * ~~~
  4444. *
  4445. * \param [in] a long type of value stored in a
  4446. * \param [in] b unsigned int type of value stored in b
  4447. * \return value stored in long type
  4448. */
  4449. __STATIC_FORCEINLINE long __RV_KSLLW(long a, unsigned int b)
  4450. {
  4451. long result;
  4452. __ASM volatile("ksllw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4453. return result;
  4454. }
  4455. /* ===== Inline Function End for 3.53. KSLLW ===== */
  4456. /* ===== Inline Function Start for 3.54. KSLLIW ===== */
  4457. /**
  4458. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  4459. * \brief KSLLIW (Saturating Shift Left Logical Immediate for Word)
  4460. * \details
  4461. * **Type**: DSP
  4462. *
  4463. * **Syntax**:\n
  4464. * ~~~
  4465. * KSLLIW Rd, Rs1, imm5u
  4466. * ~~~
  4467. *
  4468. * **Purpose**:\n
  4469. * Do logical left shift operation with saturation on a 32-bit word. The shift amount is an
  4470. * immediate value.
  4471. *
  4472. * **Description**:\n
  4473. * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with
  4474. * zero and the shift amount is specified by the imm5u constant. Any shifted value greater than 2^31-1 is
  4475. * saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated result is
  4476. * sign-extended and written to Rd. If any saturation is performed, set OV bit to 1.
  4477. *
  4478. * **Operations**:\n
  4479. * ~~~
  4480. * sa = imm5u;
  4481. * res[(31+sa):0] = Rs1.W[0] << sa;
  4482. * if (res > (2^31)-1) {
  4483. * res = 0x7fffffff; OV = 1;
  4484. * } else if (res < -2^31) {
  4485. * res = 0x80000000; OV = 1;
  4486. * }
  4487. * Rd[31:0] = res[31:0]; // RV32
  4488. * Rd[63:0] = SE(res[31:0]); // RV64
  4489. * ~~~
  4490. *
  4491. * \param [in] a long type of value stored in a
  4492. * \param [in] b unsigned int type of value stored in b
  4493. * \return value stored in long type
  4494. */
  4495. #define __RV_KSLLIW(a, b) \
  4496. ({ \
  4497. long result; \
  4498. long __a = (long)(a); \
  4499. __ASM volatile("kslliw %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  4500. result; \
  4501. })
  4502. /* ===== Inline Function End for 3.54. KSLLIW ===== */
  4503. /* ===== Inline Function Start for 3.55. KSLL8 ===== */
  4504. /**
  4505. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  4506. * \brief KSLL8 (SIMD 8-bit Saturating Shift Left Logical)
  4507. * \details
  4508. * **Type**: SIMD
  4509. *
  4510. * **Syntax**:\n
  4511. * ~~~
  4512. * KSLL8 Rd, Rs1, Rs2
  4513. * ~~~
  4514. *
  4515. * **Purpose**:\n
  4516. * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift
  4517. * amount is a variable from a GPR.
  4518. *
  4519. * **Description**:\n
  4520. * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
  4521. * with zero and the shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
  4522. * Any shifted value greater than 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is
  4523. * saturated to -2^7. And the saturated results are written to Rd. If any saturation is performed, set OV
  4524. * bit to 1.
  4525. *
  4526. * **Operations**:\n
  4527. * ~~~
  4528. * sa = Rs2[2:0];
  4529. * if (sa != 0) {
  4530. * res[(7+sa):0] = Rs1.B[x] << sa;
  4531. * if (res > (2^7)-1) {
  4532. * res = 0x7f; OV = 1;
  4533. * } else if (res < -2^7) {
  4534. * res = 0x80; OV = 1;
  4535. * }
  4536. * Rd.B[x] = res[7:0];
  4537. * } else {
  4538. * Rd = Rs1;
  4539. * }
  4540. * for RV32: x=3...0,
  4541. * for RV64: x=7...0
  4542. * ~~~
  4543. *
  4544. * \param [in] a unsigned long type of value stored in a
  4545. * \param [in] b unsigned int type of value stored in b
  4546. * \return value stored in unsigned long type
  4547. */
  4548. __STATIC_FORCEINLINE unsigned long __RV_KSLL8(unsigned long a, unsigned int b)
  4549. {
  4550. unsigned long result;
  4551. __ASM volatile("ksll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4552. return result;
  4553. }
  4554. /* ===== Inline Function End for 3.55. KSLL8 ===== */
  4555. /* ===== Inline Function Start for 3.56. KSLLI8 ===== */
  4556. /**
  4557. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  4558. * \brief KSLLI8 (SIMD 8-bit Saturating Shift Left Logical Immediate)
  4559. * \details
  4560. * **Type**: SIMD
  4561. *
  4562. * **Syntax**:\n
  4563. * ~~~
  4564. * KSLLI8 Rd, Rs1, imm3u
  4565. * ~~~
  4566. *
  4567. * **Purpose**:\n
  4568. * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift
  4569. * amount is an immediate value.
  4570. *
  4571. * **Description**:\n
  4572. * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
  4573. * with zero and the shift amount is specified by the imm3u constant. Any shifted value greater than
  4574. * 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is saturated to -2^7. And the saturated
  4575. * results are written to Rd. If any saturation is performed, set OV bit to 1.
  4576. *
  4577. * **Operations**:\n
  4578. * ~~~
  4579. * sa = imm3u[2:0];
  4580. * if (sa != 0) {
  4581. * res[(7+sa):0] = Rs1.B[x] << sa;
  4582. * if (res > (2^7)-1) {
  4583. * res = 0x7f; OV = 1;
  4584. * } else if (res < -2^7) {
  4585. * res = 0x80; OV = 1;
  4586. * }
  4587. * Rd.B[x] = res[7:0];
  4588. * } else {
  4589. * Rd = Rs1;
  4590. * }
  4591. * for RV32: x=3...0,
  4592. * for RV64: x=7...0
  4593. * ~~~
  4594. *
  4595. * \param [in] a unsigned long type of value stored in a
  4596. * \param [in] b unsigned int type of value stored in b
  4597. * \return value stored in unsigned long type
  4598. */
  4599. #define __RV_KSLLI8(a, b) \
  4600. ({ \
  4601. unsigned long result; \
  4602. unsigned long __a = (unsigned long)(a); \
  4603. __ASM volatile("kslli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  4604. result; \
  4605. })
  4606. /* ===== Inline Function End for 3.56. KSLLI8 ===== */
  4607. /* ===== Inline Function Start for 3.57. KSLL16 ===== */
  4608. /**
  4609. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  4610. * \brief KSLL16 (SIMD 16-bit Saturating Shift Left Logical)
  4611. * \details
  4612. * **Type**: SIMD
  4613. *
  4614. * **Syntax**:\n
  4615. * ~~~
  4616. * KSLL16 Rd, Rs1, Rs2
  4617. * ~~~
  4618. *
  4619. * **Purpose**:\n
  4620. * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift
  4621. * amount is a variable from a GPR.
  4622. *
  4623. * **Description**:\n
  4624. * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
  4625. * with zero and the shift amount is specified by the low-order 4-bits of the value in the Rs2 register.
  4626. * Any shifted value greater than 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is
  4627. * saturated to -2^15. And the saturated results are written to Rd. If any saturation is performed, set OV
  4628. * bit to 1.
  4629. *
  4630. * **Operations**:\n
  4631. * ~~~
  4632. * sa = Rs2[3:0];
  4633. * if (sa != 0) {
  4634. * res[(15+sa):0] = Rs1.H[x] << sa;
  4635. * if (res > (2^15)-1) {
  4636. * res = 0x7fff; OV = 1;
  4637. * } else if (res < -2^15) {
  4638. * res = 0x8000; OV = 1;
  4639. * }
  4640. * Rd.H[x] = res[15:0];
  4641. * } else {
  4642. * Rd = Rs1;
  4643. * }
  4644. * for RV32: x=1...0,
  4645. * for RV64: x=3...0
  4646. * ~~~
  4647. *
  4648. * \param [in] a unsigned long type of value stored in a
  4649. * \param [in] b unsigned int type of value stored in b
  4650. * \return value stored in unsigned long type
  4651. */
  4652. __STATIC_FORCEINLINE unsigned long __RV_KSLL16(unsigned long a, unsigned int b)
  4653. {
  4654. unsigned long result;
  4655. __ASM volatile("ksll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4656. return result;
  4657. }
  4658. /* ===== Inline Function End for 3.57. KSLL16 ===== */
  4659. /* ===== Inline Function Start for 3.58. KSLLI16 ===== */
  4660. /**
  4661. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  4662. * \brief KSLLI16 (SIMD 16-bit Saturating Shift Left Logical Immediate)
  4663. * \details
  4664. * **Type**: SIMD
  4665. *
  4666. * **Syntax**:\n
  4667. * ~~~
  4668. * KSLLI16 Rd, Rs1, imm4u
  4669. * ~~~
  4670. *
  4671. * **Purpose**:\n
  4672. * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift
  4673. * amount is an immediate value.
  4674. *
  4675. * **Description**:\n
  4676. * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
  4677. * with zero and the shift amount is specified by the imm4u constant. Any shifted value greater than
  4678. * 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is saturated to -2^15. And the saturated
  4679. * results are written to Rd. If any saturation is performed, set OV bit to 1.
  4680. *
  4681. * **Operations**:\n
  4682. * ~~~
  4683. * sa = imm4u[3:0];
  4684. * if (sa != 0) {
  4685. * res[(15+sa):0] = Rs1.H[x] << sa;
  4686. * if (res > (2^15)-1) {
  4687. * res = 0x7fff; OV = 1;
  4688. * } else if (res < -2^15) {
  4689. * res = 0x8000; OV = 1;
  4690. * }
  4691. * Rd.H[x] = res[15:0];
  4692. * } else {
  4693. * Rd = Rs1;
  4694. * }
  4695. * for RV32: x=1...0,
  4696. * for RV64: x=3...0
  4697. * ~~~
  4698. *
  4699. * \param [in] a unsigned long type of value stored in a
  4700. * \param [in] b unsigned int type of value stored in b
  4701. * \return value stored in unsigned long type
  4702. */
  4703. #define __RV_KSLLI16(a, b) \
  4704. ({ \
  4705. unsigned long result; \
  4706. unsigned long __a = (unsigned long)(a); \
  4707. __ASM volatile("kslli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  4708. result; \
  4709. })
  4710. /* ===== Inline Function End for 3.58. KSLLI16 ===== */
  4711. /* ===== Inline Function Start for 3.59.1. KSLRA8 ===== */
  4712. /**
  4713. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  4714. * \brief KSLRA8 (SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
  4715. * \details
  4716. * **Type**: SIMD
  4717. *
  4718. * **Syntax**:\n
  4719. * ~~~
  4720. * KSLRA8 Rd, Rs1, Rs2
  4721. * KSLRA8.u Rd, Rs1, Rs2
  4722. * ~~~
  4723. *
  4724. * **Purpose**:\n
  4725. * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  4726. * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the
  4727. * right shift.
  4728. *
  4729. * **Description**:\n
  4730. * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  4731. * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
  4732. * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
  4733. * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
  4734. * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
  4735. * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form
  4736. * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
  4737. * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
  4738. * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
  4739. * this instruction.
  4740. *
  4741. * **Operations**:\n
  4742. * ~~~
  4743. * if (Rs2[3:0] < 0) {
  4744. * sa = -Rs2[3:0];
  4745. * sa = (sa == 8)? 7 : sa;
  4746. * if (`.u` form) {
  4747. * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
  4748. * Rd.B[x] = res[7:0];
  4749. * } else {
  4750. * Rd.B[x] = SE8(Rs1.B[x][7:sa]);
  4751. * }
  4752. * } else {
  4753. * sa = Rs2[2:0];
  4754. * res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
  4755. * if (res > (2^7)-1) {
  4756. * res[7:0] = 0x7f; OV = 1;
  4757. * } else if (res < -2^7) {
  4758. * res[7:0] = 0x80; OV = 1;
  4759. * }
  4760. * Rd.B[x] = res[7:0];
  4761. * }
  4762. * for RV32: x=3...0,
  4763. * for RV64: x=7...0
  4764. * ~~~
  4765. *
  4766. * \param [in] a unsigned long type of value stored in a
  4767. * \param [in] b int type of value stored in b
  4768. * \return value stored in unsigned long type
  4769. */
  4770. __STATIC_FORCEINLINE unsigned long __RV_KSLRA8(unsigned long a, int b)
  4771. {
  4772. unsigned long result;
  4773. __ASM volatile("kslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4774. return result;
  4775. }
  4776. /* ===== Inline Function End for 3.59.1. KSLRA8 ===== */
  4777. /* ===== Inline Function Start for 3.59.2. KSLRA8.u ===== */
  4778. /**
  4779. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  4780. * \brief KSLRA8.u (SIMD 8-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
  4781. * \details
  4782. * **Type**: SIMD
  4783. *
  4784. * **Syntax**:\n
  4785. * ~~~
  4786. * KSLRA8 Rd, Rs1, Rs2
  4787. * KSLRA8.u Rd, Rs1, Rs2
  4788. * ~~~
  4789. *
  4790. * **Purpose**:\n
  4791. * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  4792. * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the
  4793. * right shift.
  4794. *
  4795. * **Description**:\n
  4796. * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  4797. * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
  4798. * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
  4799. * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
  4800. * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
  4801. * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form
  4802. * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
  4803. * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
  4804. * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
  4805. * this instruction.
  4806. *
  4807. * **Operations**:\n
  4808. * ~~~
  4809. * if (Rs2[3:0] < 0) {
  4810. * sa = -Rs2[3:0];
  4811. * sa = (sa == 8)? 7 : sa;
  4812. * if (`.u` form) {
  4813. * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
  4814. * Rd.B[x] = res[7:0];
  4815. * } else {
  4816. * Rd.B[x] = SE8(Rs1.B[x][7:sa]);
  4817. * }
  4818. * } else {
  4819. * sa = Rs2[2:0];
  4820. * res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
  4821. * if (res > (2^7)-1) {
  4822. * res[7:0] = 0x7f; OV = 1;
  4823. * } else if (res < -2^7) {
  4824. * res[7:0] = 0x80; OV = 1;
  4825. * }
  4826. * Rd.B[x] = res[7:0];
  4827. * }
  4828. * for RV32: x=3...0,
  4829. * for RV64: x=7...0
  4830. * ~~~
  4831. *
  4832. * \param [in] a unsigned long type of value stored in a
  4833. * \param [in] b int type of value stored in b
  4834. * \return value stored in unsigned long type
  4835. */
  4836. __STATIC_FORCEINLINE unsigned long __RV_KSLRA8_U(unsigned long a, int b)
  4837. {
  4838. unsigned long result;
  4839. __ASM volatile("kslra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4840. return result;
  4841. }
  4842. /* ===== Inline Function End for 3.59.2. KSLRA8.u ===== */
  4843. /* ===== Inline Function Start for 3.60.1. KSLRA16 ===== */
  4844. /**
  4845. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  4846. * \brief KSLRA16 (SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
  4847. * \details
  4848. * **Type**: SIMD
  4849. *
  4850. * **Syntax**:\n
  4851. * ~~~
  4852. * KSLRA16 Rd, Rs1, Rs2
  4853. * KSLRA16.u Rd, Rs1, Rs2
  4854. * ~~~
  4855. *
  4856. * **Purpose**:\n
  4857. * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  4858. * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the
  4859. * right shift.
  4860. *
  4861. * **Description**:\n
  4862. * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  4863. * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
  4864. * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
  4865. * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
  4866. * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
  4867. * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u`
  4868. * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
  4869. * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
  4870. * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
  4871. * this instruction.
  4872. *
  4873. * **Operations**:\n
  4874. * ~~~
  4875. * if (Rs2[4:0] < 0) {
  4876. * sa = -Rs2[4:0];
  4877. * sa = (sa == 16)? 15 : sa;
  4878. * if (`.u` form) {
  4879. * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
  4880. * Rd.H[x] = res[15:0];
  4881. * } else {
  4882. * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
  4883. * }
  4884. * } else {
  4885. * sa = Rs2[3:0];
  4886. * res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
  4887. * if (res > (2^15)-1) {
  4888. * res[15:0] = 0x7fff; OV = 1;
  4889. * } else if (res < -2^15) {
  4890. * res[15:0] = 0x8000; OV = 1;
  4891. * }
  4892. * d.H[x] = res[15:0];
  4893. * }
  4894. * for RV32: x=1...0,
  4895. * for RV64: x=3...0
  4896. * ~~~
  4897. *
  4898. * \param [in] a unsigned long type of value stored in a
  4899. * \param [in] b int type of value stored in b
  4900. * \return value stored in unsigned long type
  4901. */
  4902. __STATIC_FORCEINLINE unsigned long __RV_KSLRA16(unsigned long a, int b)
  4903. {
  4904. unsigned long result;
  4905. __ASM volatile("kslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4906. return result;
  4907. }
  4908. /* ===== Inline Function End for 3.60.1. KSLRA16 ===== */
  4909. /* ===== Inline Function Start for 3.60.2. KSLRA16.u ===== */
  4910. /**
  4911. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  4912. * \brief KSLRA16.u (SIMD 16-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
  4913. * \details
  4914. * **Type**: SIMD
  4915. *
  4916. * **Syntax**:\n
  4917. * ~~~
  4918. * KSLRA16 Rd, Rs1, Rs2
  4919. * KSLRA16.u Rd, Rs1, Rs2
  4920. * ~~~
  4921. *
  4922. * **Purpose**:\n
  4923. * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  4924. * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the
  4925. * right shift.
  4926. *
  4927. * **Description**:\n
  4928. * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  4929. * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
  4930. * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
  4931. * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
  4932. * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
  4933. * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u`
  4934. * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
  4935. * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
  4936. * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
  4937. * this instruction.
  4938. *
  4939. * **Operations**:\n
  4940. * ~~~
  4941. * if (Rs2[4:0] < 0) {
  4942. * sa = -Rs2[4:0];
  4943. * sa = (sa == 16)? 15 : sa;
  4944. * if (`.u` form) {
  4945. * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
  4946. * Rd.H[x] = res[15:0];
  4947. * } else {
  4948. * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
  4949. * }
  4950. * } else {
  4951. * sa = Rs2[3:0];
  4952. * res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
  4953. * if (res > (2^15)-1) {
  4954. * res[15:0] = 0x7fff; OV = 1;
  4955. * } else if (res < -2^15) {
  4956. * res[15:0] = 0x8000; OV = 1;
  4957. * }
  4958. * d.H[x] = res[15:0];
  4959. * }
  4960. * for RV32: x=1...0,
  4961. * for RV64: x=3...0
  4962. * ~~~
  4963. *
  4964. * \param [in] a unsigned long type of value stored in a
  4965. * \param [in] b int type of value stored in b
  4966. * \return value stored in unsigned long type
  4967. */
  4968. __STATIC_FORCEINLINE unsigned long __RV_KSLRA16_U(unsigned long a, int b)
  4969. {
  4970. unsigned long result;
  4971. __ASM volatile("kslra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4972. return result;
  4973. }
  4974. /* ===== Inline Function End for 3.60.2. KSLRA16.u ===== */
  4975. /* ===== Inline Function Start for 3.61. KSLRAW ===== */
  4976. /**
  4977. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  4978. * \brief KSLRAW (Shift Left Logical with Q31 Saturation or Shift Right Arithmetic)
  4979. * \details
  4980. * **Type**: DSP
  4981. *
  4982. * **Syntax**:\n
  4983. * ~~~
  4984. * KSLRAW Rd, Rs1, Rs2
  4985. * ~~~
  4986. *
  4987. * **Purpose**:\n
  4988. * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31
  4989. * saturation for the left shift on a 32-bit data.
  4990. *
  4991. * **Description**:\n
  4992. * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically
  4993. * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
  4994. * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
  4995. * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31].
  4996. * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. After the shift
  4997. * operation, the final result is bit-31 sign-extended and written to Rd. If any saturation happens, this
  4998. * instruction sets the OV flag. The value of Rs2[31:6] will not affected the operation of this instruction.
  4999. *
  5000. * **Operations**:\n
  5001. * ~~~
  5002. * if (Rs2[5:0] < 0) {
  5003. * sa = -Rs2[5:0];
  5004. * sa = (sa == 32)? 31 : sa;
  5005. * res[31:0] = Rs1.W[0] >>(arith) sa;
  5006. * } else {
  5007. * sa = Rs2[5:0];
  5008. * tmp = Rs1.W[0] <<(logic) sa;
  5009. * if (tmp > (2^31)-1) {
  5010. * res[31:0] = (2^31)-1;
  5011. * OV = 1;
  5012. * } else if (tmp < -2^31) {
  5013. * res[31:0] = -2^31;
  5014. * OV = 1
  5015. * } else {
  5016. * res[31:0] = tmp[31:0];
  5017. * }
  5018. * }
  5019. * Rd = res[31:0]; // RV32
  5020. * Rd = SE64(res[31:0]); // RV64
  5021. * ~~~
  5022. *
  5023. * \param [in] a int type of value stored in a
  5024. * \param [in] b int type of value stored in b
  5025. * \return value stored in long type
  5026. */
  5027. __STATIC_FORCEINLINE long __RV_KSLRAW(int a, int b)
  5028. {
  5029. long result;
  5030. __ASM volatile("kslraw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5031. return result;
  5032. }
  5033. /* ===== Inline Function End for 3.61. KSLRAW ===== */
  5034. /* ===== Inline Function Start for 3.62. KSLRAW.u ===== */
  5035. /**
  5036. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  5037. * \brief KSLRAW.u (Shift Left Logical with Q31 Saturation or Rounding Shift Right Arithmetic)
  5038. * \details
  5039. * **Type**: DSP
  5040. *
  5041. * **Syntax**:\n
  5042. * ~~~
  5043. * KSLRAW.u Rd, Rs1, Rs2
  5044. * ~~~
  5045. *
  5046. * **Purpose**:\n
  5047. * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31
  5048. * saturation for the left shift and a rounding up operation for the right shift on a 32-bit data.
  5049. *
  5050. * **Description**:\n
  5051. * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically
  5052. * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
  5053. * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
  5054. * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31].
  5055. * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. The right-shifted
  5056. * result is added a 1 to the most significant discarded bit position for rounding effect. After the shift,
  5057. * saturation, or rounding, the final result is bit-31 sign-extended and written to Rd. If any saturation
  5058. * happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect the operation of this
  5059. * instruction.
  5060. *
  5061. * **Operations**:\n
  5062. * ~~~
  5063. * if (Rs2[5:0] < 0) {
  5064. * sa = -Rs2[5:0];
  5065. * sa = (sa == 32)? 31 : sa;
  5066. * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
  5067. * rst[31:0] = res[31:0];
  5068. * } else {
  5069. * sa = Rs2[5:0];
  5070. * tmp = Rs1.W[0] <<(logic) sa;
  5071. * if (tmp > (2^31)-1) {
  5072. * rst[31:0] = (2^31)-1;
  5073. * OV = 1;
  5074. * } else if (tmp < -2^31) {
  5075. * rst[31:0] = -2^31;
  5076. * OV = 1
  5077. * } else {
  5078. * rst[31:0] = tmp[31:0];
  5079. * }
  5080. * }
  5081. * Rd = rst[31:0]; // RV32
  5082. * Rd = SE64(rst[31:0]); // RV64
  5083. * ~~~
  5084. *
  5085. * \param [in] a int type of value stored in a
  5086. * \param [in] b int type of value stored in b
  5087. * \return value stored in long type
  5088. */
  5089. __STATIC_FORCEINLINE long __RV_KSLRAW_U(int a, int b)
  5090. {
  5091. long result;
  5092. __ASM volatile("kslraw.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5093. return result;
  5094. }
  5095. /* ===== Inline Function End for 3.62. KSLRAW.u ===== */
  5096. /* ===== Inline Function Start for 3.63. KSTAS16 ===== */
  5097. /**
  5098. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  5099. * \brief KSTAS16 (SIMD 16-bit Signed Saturating Straight Addition & Subtraction)
  5100. * \details
  5101. * **Type**: SIMD
  5102. *
  5103. * **Syntax**:\n
  5104. * ~~~
  5105. * KSTAS16 Rd, Rs1, Rs2
  5106. * ~~~
  5107. *
  5108. * **Purpose**:\n
  5109. * Do 16-bit signed integer element saturating addition and 16-bit signed integer element
  5110. * saturating subtraction in a 32-bit chunk simultaneously. Operands are from corresponding
  5111. * positions in 32-bit chunks.
  5112. *
  5113. * **Description**:\n
  5114. * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
  5115. * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it
  5116. * subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed
  5117. * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
  5118. * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
  5119. * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for
  5120. * subtraction.
  5121. *
  5122. * **Operations**:\n
  5123. * ~~~
  5124. * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
  5125. * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
  5126. * for (res in [res1, res2]) {
  5127. * if (res > (2^15)-1) {
  5128. * res = (2^15)-1;
  5129. * OV = 1;
  5130. * } else if (res < -2^15) {
  5131. * res = -2^15;
  5132. * OV = 1;
  5133. * }
  5134. * }
  5135. * Rd.W[x][31:16] = res1;
  5136. * Rd.W[x][15:0] = res2;
  5137. * for RV32, x=0
  5138. * for RV64, x=1...0
  5139. * ~~~
  5140. *
  5141. * \param [in] a unsigned long type of value stored in a
  5142. * \param [in] b unsigned long type of value stored in b
  5143. * \return value stored in unsigned long type
  5144. */
  5145. __STATIC_FORCEINLINE unsigned long __RV_KSTAS16(unsigned long a, unsigned long b)
  5146. {
  5147. unsigned long result;
  5148. __ASM volatile("kstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5149. return result;
  5150. }
  5151. /* ===== Inline Function End for 3.63. KSTAS16 ===== */
  5152. /* ===== Inline Function Start for 3.64. KSTSA16 ===== */
  5153. /**
  5154. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  5155. * \brief KSTSA16 (SIMD 16-bit Signed Saturating Straight Subtraction & Addition)
  5156. * \details
  5157. * **Type**: SIMD
  5158. *
  5159. * **Syntax**:\n
  5160. * ~~~
  5161. * KSTSA16 Rd, Rs1, Rs2
  5162. * ~~~
  5163. *
  5164. * **Purpose**:\n
  5165. * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element
  5166. * saturating addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in
  5167. * 32-bit chunks.
  5168. *
  5169. * **Description**:\n
  5170. * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks
  5171. * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it
  5172. * adds the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 with the 16-bit signed integer
  5173. * element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number range (-2^15
  5174. * <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
  5175. * written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd for
  5176. * addition.
  5177. *
  5178. * **Operations**:\n
  5179. * ~~~
  5180. * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
  5181. * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
  5182. * for (res in [res1, res2]) {
  5183. * if (res > (2^15)-1) {
  5184. * res = (2^15)-1;
  5185. * OV = 1;
  5186. * } else if (res < -2^15) {
  5187. * res = -2^15;
  5188. * OV = 1;
  5189. * }
  5190. * }
  5191. * Rd.W[x][31:16] = res1;
  5192. * Rd.W[x][15:0] = res2;
  5193. * for RV32, x=0
  5194. * for RV64, x=1...0
  5195. * ~~~
  5196. *
  5197. * \param [in] a unsigned long type of value stored in a
  5198. * \param [in] b unsigned long type of value stored in b
  5199. * \return value stored in unsigned long type
  5200. */
  5201. __STATIC_FORCEINLINE unsigned long __RV_KSTSA16(unsigned long a, unsigned long b)
  5202. {
  5203. unsigned long result;
  5204. __ASM volatile("kstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5205. return result;
  5206. }
  5207. /* ===== Inline Function End for 3.64. KSTSA16 ===== */
  5208. /* ===== Inline Function Start for 3.65. KSUB8 ===== */
  5209. /**
  5210. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  5211. * \brief KSUB8 (SIMD 8-bit Signed Saturating Subtraction)
  5212. * \details
  5213. * **Type**: SIMD
  5214. *
  5215. * **Syntax**:\n
  5216. * ~~~
  5217. * KSUB8 Rd, Rs1, Rs2
  5218. * ~~~
  5219. *
  5220. * **Purpose**:\n
  5221. * Do 8-bit signed elements saturating subtractions simultaneously.
  5222. *
  5223. * **Description**:\n
  5224. * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
  5225. * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 27
  5226. * -1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
  5227. *
  5228. * **Operations**:\n
  5229. * ~~~
  5230. * res[x] = Rs1.B[x] - Rs2.B[x];
  5231. * if (res[x] > (2^7)-1) {
  5232. * res[x] = (2^7)-1;
  5233. * OV = 1;
  5234. * } else if (res[x] < -2^7) {
  5235. * res[x] = -2^7;
  5236. * OV = 1;
  5237. * }
  5238. * Rd.B[x] = res[x];
  5239. * for RV32: x=3...0,
  5240. * for RV64: x=7...0
  5241. * ~~~
  5242. *
  5243. * \param [in] a unsigned long type of value stored in a
  5244. * \param [in] b unsigned long type of value stored in b
  5245. * \return value stored in unsigned long type
  5246. */
  5247. __STATIC_FORCEINLINE unsigned long __RV_KSUB8(unsigned long a, unsigned long b)
  5248. {
  5249. unsigned long result;
  5250. __ASM volatile("ksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5251. return result;
  5252. }
  5253. /* ===== Inline Function End for 3.65. KSUB8 ===== */
  5254. /* ===== Inline Function Start for 3.66. KSUB16 ===== */
  5255. /**
  5256. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  5257. * \brief KSUB16 (SIMD 16-bit Signed Saturating Subtraction)
  5258. * \details
  5259. * **Type**: SIMD
  5260. *
  5261. * **Syntax**:\n
  5262. * ~~~
  5263. * KSUB16 Rd, Rs1, Rs2
  5264. * ~~~
  5265. *
  5266. * **Purpose**:\n
  5267. * Do 16-bit signed integer elements saturating subtractions simultaneously.
  5268. *
  5269. * **Description**:\n
  5270. * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
  5271. * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <=
  5272. * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
  5273. * Rd.
  5274. *
  5275. * **Operations**:\n
  5276. * ~~~
  5277. * res[x] = Rs1.H[x] - Rs2.H[x];
  5278. * if (res[x] > (2^15)-1) {
  5279. * res[x] = (2^15)-1;
  5280. * OV = 1;
  5281. * } else if (res[x] < -2^15) {
  5282. * res[x] = -2^15;
  5283. * OV = 1;
  5284. * }
  5285. * Rd.H[x] = res[x];
  5286. * for RV32: x=1...0,
  5287. * for RV64: x=3...0
  5288. * ~~~
  5289. *
  5290. * \param [in] a unsigned long type of value stored in a
  5291. * \param [in] b unsigned long type of value stored in b
  5292. * \return value stored in unsigned long type
  5293. */
  5294. __STATIC_FORCEINLINE unsigned long __RV_KSUB16(unsigned long a, unsigned long b)
  5295. {
  5296. unsigned long result;
  5297. __ASM volatile("ksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5298. return result;
  5299. }
  5300. /* ===== Inline Function End for 3.66. KSUB16 ===== */
  5301. /* ===== Inline Function Start for 3.67. KSUB64 ===== */
  5302. /**
  5303. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  5304. * \brief KSUB64 (64-bit Signed Saturating Subtraction)
  5305. * \details
  5306. * **Type**: DSP (64-bit Profile)
  5307. *
  5308. * **Syntax**:\n
  5309. * ~~~
  5310. * KSUB64 Rd, Rs1, Rs2
  5311. * ~~~
  5312. *
  5313. * **Purpose**:\n
  5314. * Perform a 64-bit signed integer subtraction. The result is saturated to the Q63 range.
  5315. *
  5316. * **RV32 Description**:\n
  5317. * This instruction subtracts the 64-bit signed integer of an even/odd pair of
  5318. * registers specified by Rs2(4,1) from the 64-bit signed integer of an even/odd pair of registers
  5319. * specified by Rs1(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is
  5320. * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd
  5321. * pair of registers specified by Rd(4,1).
  5322. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  5323. * includes register 2d and 2d+1.
  5324. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  5325. * register of the pair contains the low 32-bit of the operand.
  5326. *
  5327. * **RV64 Description**:\n
  5328. * This instruction subtracts the 64-bit signed integer of Rs2 from the 64-bit signed
  5329. * integer of Rs1. If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated
  5330. * to the range and the OV bit is set to 1. The saturated result is then written to Rd.
  5331. *
  5332. * **Operations**:\n
  5333. * ~~~
  5334. * RV32:
  5335. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  5336. * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
  5337. * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
  5338. * result = R[a_H].R[a_L] - R[b_H].R[b_L];
  5339. * if (result > (2^63)-1) {
  5340. * result = (2^63)-1; OV = 1;
  5341. * } else if (result < -2^63) {
  5342. * result = -2^63; OV = 1;
  5343. * }
  5344. * R[t_H].R[t_L] = result;
  5345. * RV64:
  5346. * result = Rs1 - Rs2;
  5347. * if (result > (2^63)-1) {
  5348. * result = (2^63)-1; OV = 1;
  5349. * } else if (result < -2^63) {
  5350. * result = -2^63; OV = 1;
  5351. * }
  5352. * Rd = result;
  5353. * ~~~
  5354. *
  5355. * \param [in] a long long type of value stored in a
  5356. * \param [in] b long long type of value stored in b
  5357. * \return value stored in long long type
  5358. */
  5359. __STATIC_FORCEINLINE long long __RV_KSUB64(long long a, long long b)
  5360. {
  5361. long long result;
  5362. __ASM volatile("ksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5363. return result;
  5364. }
  5365. /* ===== Inline Function End for 3.67. KSUB64 ===== */
  5366. /* ===== Inline Function Start for 3.68. KSUBH ===== */
  5367. /**
  5368. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
  5369. * \brief KSUBH (Signed Subtraction with Q15 Saturation)
  5370. * \details
  5371. * **Type**: DSP
  5372. *
  5373. * **Syntax**:\n
  5374. * ~~~
  5375. * KSUBH Rd, Rs1, Rs2
  5376. * ~~~
  5377. *
  5378. * **Purpose**:\n
  5379. * Subtract the signed lower 32-bit content of two registers with Q15 saturation.
  5380. *
  5381. * **Description**:\n
  5382. * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit
  5383. * content of Rs1. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then
  5384. * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
  5385. *
  5386. * **Operations**:\n
  5387. * ~~~
  5388. * tmp = Rs1.W[0] - Rs2.W[0];
  5389. * if (tmp > (2^15)-1) {
  5390. * res = (2^15)-1;
  5391. * OV = 1;
  5392. * } else if (tmp < -2^15) {
  5393. * res = -2^15;
  5394. * OV = 1
  5395. * } else {
  5396. * res = tmp;
  5397. * }
  5398. * Rd = SE(res[15:0]);
  5399. * ~~~
  5400. *
  5401. * \param [in] a int type of value stored in a
  5402. * \param [in] b int type of value stored in b
  5403. * \return value stored in long type
  5404. */
  5405. __STATIC_FORCEINLINE long __RV_KSUBH(int a, int b)
  5406. {
  5407. long result;
  5408. __ASM volatile("ksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5409. return result;
  5410. }
  5411. /* ===== Inline Function End for 3.68. KSUBH ===== */
  5412. /* ===== Inline Function Start for 3.69. KSUBW ===== */
  5413. /**
  5414. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  5415. * \brief KSUBW (Signed Subtraction with Q31 Saturation)
  5416. * \details
  5417. * **Type**: DSP
  5418. *
  5419. * **Syntax**:\n
  5420. * ~~~
  5421. * KSUBW Rd, Rs1, Rs2
  5422. * ~~~
  5423. *
  5424. * **Purpose**:\n
  5425. * Subtract the signed lower 32-bit content of two registers with Q31 saturation.
  5426. *
  5427. * **Description**:\n
  5428. * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit
  5429. * content of Rs1. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then
  5430. * sign-extened and written to Rd. If saturation happens, this instruction sets the OV flag.
  5431. *
  5432. * **Operations**:\n
  5433. * ~~~
  5434. * tmp = Rs1.W[0] - Rs2.W[0];
  5435. * if (tmp > (2^31)-1) {
  5436. * res = (2^31)-1;
  5437. * OV = 1;
  5438. * } else if (tmp < -2^31) {
  5439. * res = -2^31;
  5440. * OV = 1
  5441. * } else {
  5442. * res = tmp;
  5443. * }
  5444. * Rd = res[31:0]; // RV32
  5445. * Rd = SE(res[31:0]); // RV64
  5446. * ~~~
  5447. *
  5448. * \param [in] a int type of value stored in a
  5449. * \param [in] b int type of value stored in b
  5450. * \return value stored in long type
  5451. */
  5452. __STATIC_FORCEINLINE long __RV_KSUBW(int a, int b)
  5453. {
  5454. long result;
  5455. __ASM volatile("ksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5456. return result;
  5457. }
  5458. /* ===== Inline Function End for 3.69. KSUBW ===== */
  5459. /* ===== Inline Function Start for 3.70.1. KWMMUL ===== */
  5460. /**
  5461. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  5462. * \brief KWMMUL (SIMD Saturating MSW Signed Multiply Word & Double)
  5463. * \details
  5464. * **Type**: SIMD
  5465. *
  5466. * **Syntax**:\n
  5467. * ~~~
  5468. * KWMMUL Rd, Rs1, Rs2
  5469. * KWMMUL.u Rd, Rs1, Rs2
  5470. * ~~~
  5471. *
  5472. * **Purpose**:\n
  5473. * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit,
  5474. * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally
  5475. * rounds up the multiplication results from the most signification discarded bit.
  5476. *
  5477. * **Description**:\n
  5478. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
  5479. * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
  5480. * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
  5481. * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u`
  5482. * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
  5483. * 30 before the shift and saturation operations.
  5484. *
  5485. * **Operations**:\n
  5486. * ~~~
  5487. * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) {
  5488. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  5489. * if (`.u` form) {
  5490. * Round[x][33:0] = Mres[x][63:30] + 1;
  5491. * Rd.W[x] = Round[x][32:1];
  5492. * } else {
  5493. * Rd.W[x] = Mres[x][62:31];
  5494. * }
  5495. * } else {
  5496. * Rd.W[x] = 0x7fffffff;
  5497. * OV = 1;
  5498. * }
  5499. * for RV32: x=0
  5500. * for RV64: x=1...0
  5501. * ~~~
  5502. *
  5503. * \param [in] a long type of value stored in a
  5504. * \param [in] b long type of value stored in b
  5505. * \return value stored in long type
  5506. */
  5507. __STATIC_FORCEINLINE long __RV_KWMMUL(long a, long b)
  5508. {
  5509. long result;
  5510. __ASM volatile("kwmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5511. return result;
  5512. }
  5513. /* ===== Inline Function End for 3.70.1. KWMMUL ===== */
  5514. /* ===== Inline Function Start for 3.70.2. KWMMUL.u ===== */
  5515. /**
  5516. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  5517. * \brief KWMMUL.u (SIMD Saturating MSW Signed Multiply Word & Double with Rounding)
  5518. * \details
  5519. * **Type**: SIMD
  5520. *
  5521. * **Syntax**:\n
  5522. * ~~~
  5523. * KWMMUL Rd, Rs1, Rs2
  5524. * KWMMUL.u Rd, Rs1, Rs2
  5525. * ~~~
  5526. *
  5527. * **Purpose**:\n
  5528. * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit,
  5529. * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally
  5530. * rounds up the multiplication results from the most signification discarded bit.
  5531. *
  5532. * **Description**:\n
  5533. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
  5534. * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
  5535. * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
  5536. * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u`
  5537. * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
  5538. * 30 before the shift and saturation operations.
  5539. *
  5540. * **Operations**:\n
  5541. * ~~~
  5542. * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) {
  5543. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  5544. * if (`.u` form) {
  5545. * Round[x][33:0] = Mres[x][63:30] + 1;
  5546. * Rd.W[x] = Round[x][32:1];
  5547. * } else {
  5548. * Rd.W[x] = Mres[x][62:31];
  5549. * }
  5550. * } else {
  5551. * Rd.W[x] = 0x7fffffff;
  5552. * OV = 1;
  5553. * }
  5554. * for RV32: x=0
  5555. * for RV64: x=1...0
  5556. * ~~~
  5557. *
  5558. * \param [in] a long type of value stored in a
  5559. * \param [in] b long type of value stored in b
  5560. * \return value stored in long type
  5561. */
  5562. __STATIC_FORCEINLINE long __RV_KWMMUL_U(long a, long b)
  5563. {
  5564. long result;
  5565. __ASM volatile("kwmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5566. return result;
  5567. }
  5568. /* ===== Inline Function End for 3.70.2. KWMMUL.u ===== */
  5569. /* ===== Inline Function Start for 3.71. MADDR32 ===== */
  5570. /**
  5571. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  5572. * \brief MADDR32 (Multiply and Add to 32-Bit Word)
  5573. * \details
  5574. * **Type**: DSP
  5575. *
  5576. * **Syntax**:\n
  5577. * ~~~
  5578. * MADDR32 Rd, Rs1, Rs2
  5579. * ~~~
  5580. *
  5581. * **Purpose**:\n
  5582. * Multiply the 32-bit contents of two registers and add the lower 32-bit multiplication result
  5583. * to the 32-bit content of a destination register. Write the final result back to the destination register.
  5584. *
  5585. * **Description**:\n
  5586. * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2. It adds the
  5587. * lower 32-bit multiplication result to the lower 32-bit content of Rd and writes the final result (RV32)
  5588. * or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either signed or
  5589. * unsigned integers.
  5590. *
  5591. * **Operations**:\n
  5592. * ~~~
  5593. * RV32:
  5594. * Mresult = Rs1 * Rs2;
  5595. * Rd = Rd + Mresult.W[0];
  5596. * RV64:
  5597. * Mresult = Rs1.W[0] * Rs2.W[0];
  5598. * tres[31:0] = Rd.W[0] + Mresult.W[0];
  5599. * Rd = SE64(tres[31:0]);
  5600. * ~~~
  5601. *
  5602. * \param [in] t unsigned long type of value stored in t
  5603. * \param [in] a unsigned long type of value stored in a
  5604. * \param [in] b unsigned long type of value stored in b
  5605. * \return value stored in unsigned long type
  5606. */
  5607. __STATIC_FORCEINLINE unsigned long __RV_MADDR32(unsigned long t, unsigned long a, unsigned long b)
  5608. {
  5609. __ASM volatile("maddr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  5610. return t;
  5611. }
  5612. /* ===== Inline Function End for 3.71. MADDR32 ===== */
  5613. /* ===== Inline Function Start for 3.72. MAXW ===== */
  5614. /**
  5615. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  5616. * \brief MAXW (32-bit Signed Word Maximum)
  5617. * \details
  5618. * **Type**: DSP
  5619. *
  5620. * **Syntax**:\n
  5621. * ~~~
  5622. * MAXW Rd, Rs1, Rs2
  5623. * ~~~
  5624. *
  5625. * **Purpose**:\n
  5626. * Get the larger value from the 32-bit contents of two general registers.
  5627. *
  5628. * **Description**:\n
  5629. * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the
  5630. * larger value as the result, and writes the result to Rd.
  5631. *
  5632. * **Operations**:\n
  5633. * ~~~
  5634. * if (Rs1.W[0] >= Rs2.W[0]) {
  5635. * Rd = SE(Rs1.W[0]);
  5636. * } else {
  5637. * Rd = SE(Rs2.W[0]);
  5638. * }
  5639. * ~~~
  5640. *
  5641. * \param [in] a int type of value stored in a
  5642. * \param [in] b int type of value stored in b
  5643. * \return value stored in long type
  5644. */
  5645. __STATIC_FORCEINLINE long __RV_MAXW(int a, int b)
  5646. {
  5647. long result;
  5648. __ASM volatile("maxw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5649. return result;
  5650. }
  5651. /* ===== Inline Function End for 3.72. MAXW ===== */
  5652. /* ===== Inline Function Start for 3.73. MINW ===== */
  5653. /**
  5654. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  5655. * \brief MINW (32-bit Signed Word Minimum)
  5656. * \details
  5657. * **Type**: DSP
  5658. *
  5659. * **Syntax**:\n
  5660. * ~~~
  5661. * MINW Rd, Rs1, Rs2
  5662. * ~~~
  5663. *
  5664. * **Purpose**:\n
  5665. * Get the smaller value from the 32-bit contents of two general registers.
  5666. *
  5667. * **Description**:\n
  5668. * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the
  5669. * smaller value as the result, and writes the result to Rd.
  5670. *
  5671. * **Operations**:\n
  5672. * ~~~
  5673. * if (Rs1.W[0] >= Rs2.W[0]) { Rd = SE(Rs2.W[0]); } else { Rd = SE(Rs1.W[0]); }
  5674. * ~~~
  5675. *
  5676. * \param [in] a int type of value stored in a
  5677. * \param [in] b int type of value stored in b
  5678. * \return value stored in long type
  5679. */
  5680. __STATIC_FORCEINLINE long __RV_MINW(int a, int b)
  5681. {
  5682. long result;
  5683. __ASM volatile("minw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5684. return result;
  5685. }
  5686. /* ===== Inline Function End for 3.73. MINW ===== */
  5687. /* ===== Inline Function Start for 3.74. MSUBR32 ===== */
  5688. /**
  5689. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  5690. * \brief MSUBR32 (Multiply and Subtract from 32-Bit Word)
  5691. * \details
  5692. * **Type**: DSP
  5693. *
  5694. * **Syntax**:\n
  5695. * ~~~
  5696. * MSUBR32 Rd, Rs1, Rs2
  5697. * ~~~
  5698. *
  5699. * **Purpose**:\n
  5700. * Multiply the 32-bit contents of two registers and subtract the lower 32-bit multiplication
  5701. * result from the 32-bit content of a destination register. Write the final result back to the destination
  5702. * register.
  5703. *
  5704. * **Description**:\n
  5705. * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2, subtracts
  5706. * the lower 32-bit multiplication result from the lower 32-bit content of Rd, then writes the final
  5707. * result (RV32) or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either
  5708. * signed or unsigned integers.
  5709. *
  5710. * **Operations**:\n
  5711. * ~~~
  5712. * RV32:
  5713. * Mresult = Rs1 * Rs2;
  5714. * Rd = Rd - Mresult.W[0];
  5715. * RV64:
  5716. * Mresult = Rs1.W[0] * Rs2.W[0];
  5717. * tres[31:0] = Rd.W[0] - Mresult.W[0];
  5718. * Rd = SE64(tres[31:0]);
  5719. * ~~~
  5720. *
  5721. * \param [in] t unsigned long type of value stored in t
  5722. * \param [in] a unsigned long type of value stored in a
  5723. * \param [in] b unsigned long type of value stored in b
  5724. * \return value stored in unsigned long type
  5725. */
  5726. __STATIC_FORCEINLINE unsigned long __RV_MSUBR32(unsigned long t, unsigned long a, unsigned long b)
  5727. {
  5728. __ASM volatile("msubr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  5729. return t;
  5730. }
  5731. /* ===== Inline Function End for 3.74. MSUBR32 ===== */
  5732. /* ===== Inline Function Start for 3.75. MULR64 ===== */
  5733. /**
  5734. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  5735. * \brief MULR64 (Multiply Word Unsigned to 64-bit Data)
  5736. * \details
  5737. * **Type**: DSP
  5738. *
  5739. * **Syntax**:\n
  5740. * ~~~
  5741. * MULR64 Rd, Rs1, Rs2
  5742. * ~~~
  5743. *
  5744. * **Purpose**:\n
  5745. * Multiply the 32-bit unsigned integer contents of two registers and write the 64-bit result.
  5746. *
  5747. * **RV32 Description**:\n
  5748. * This instruction multiplies the 32-bit content of Rs1 with that of Rs2 and writes the 64-bit
  5749. * multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d determines the
  5750. * even/odd pair group of the two registers. Specifically, the register pair includes register 2d and
  5751. * 2d+1.
  5752. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  5753. * of the pair contains the low 32-bit of the result.
  5754. * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers.
  5755. *
  5756. * **RV64 Description**:\n
  5757. * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2 and writes the 64-bit
  5758. * multiplication result to Rd.
  5759. * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers.
  5760. *
  5761. * **Operations**:\n
  5762. * ~~~
  5763. * RV32:
  5764. * Mresult = CONCAT(1`b0,Rs1) u* CONCAT(1`b0,Rs2);
  5765. * R[Rd(4,1).1(0)][31:0] = Mresult[63:32];
  5766. * R[Rd(4,1).0(0)][31:0] = Mresult[31:0];
  5767. * RV64:
  5768. * Rd = Mresult[63:0];
  5769. * Mresult = CONCAT(1`b0,Rs1.W[0]) u* CONCAT(1`b0,Rs2.W[0]);
  5770. * ~~~
  5771. *
  5772. * \param [in] a unsigned long type of value stored in a
  5773. * \param [in] b unsigned long type of value stored in b
  5774. * \return value stored in unsigned long long type
  5775. */
  5776. __STATIC_FORCEINLINE unsigned long long __RV_MULR64(unsigned long a, unsigned long b)
  5777. {
  5778. unsigned long long result;
  5779. __ASM volatile("mulr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5780. return result;
  5781. }
  5782. /* ===== Inline Function End for 3.75. MULR64 ===== */
  5783. /* ===== Inline Function Start for 3.76. MULSR64 ===== */
  5784. /**
  5785. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  5786. * \brief MULSR64 (Multiply Word Signed to 64-bit Data)
  5787. * \details
  5788. * **Type**: DSP
  5789. *
  5790. * **Syntax**:\n
  5791. * ~~~
  5792. * MULSR64 Rd, Rs1, Rs2
  5793. * ~~~
  5794. *
  5795. * **Purpose**:\n
  5796. * Multiply the 32-bit signed integer contents of two registers and write the 64-bit result.
  5797. *
  5798. * **RV32 Description**:\n
  5799. * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and
  5800. * writes the 64-bit multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d
  5801. * determines the even/odd pair group of the two registers. Specifically, the register pair includes
  5802. * register 2d and 2d+1.
  5803. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  5804. * of the pair contains the low 32-bit of the result.
  5805. * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  5806. *
  5807. * **RV64 Description**:\n
  5808. * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and
  5809. * writes the 64-bit multiplication result to Rd.
  5810. * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  5811. *
  5812. * **Operations**:\n
  5813. * ~~~
  5814. * RV32:
  5815. * Mresult = Ra s* Rb;
  5816. * R[Rd(4,1).1(0)][31:0] = Mresult[63:32];
  5817. * R[Rd(4,1).0(0)][31:0] = Mresult[31:0];
  5818. * RV64:
  5819. * Mresult = Ra.W[0] s* Rb.W[0];
  5820. * Rd = Mresult[63:0];
  5821. * ~~~
  5822. *
  5823. * \param [in] a long type of value stored in a
  5824. * \param [in] b long type of value stored in b
  5825. * \return value stored in long long type
  5826. */
  5827. __STATIC_FORCEINLINE long long __RV_MULSR64(long a, long b)
  5828. {
  5829. long long result;
  5830. __ASM volatile("mulsr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5831. return result;
  5832. }
  5833. /* ===== Inline Function End for 3.76. MULSR64 ===== */
  5834. /* ===== Inline Function Start for 3.77. PBSAD ===== */
  5835. /**
  5836. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
  5837. * \brief PBSAD (Parallel Byte Sum of Absolute Difference)
  5838. * \details
  5839. * **Type**: DSP
  5840. *
  5841. * **Syntax**:\n
  5842. * ~~~
  5843. * PBSAD Rd, Rs1, Rs2
  5844. * ~~~
  5845. *
  5846. * **Purpose**:\n
  5847. * Calculate the sum of absolute difference of unsigned 8-bit data elements.
  5848. *
  5849. * **Description**:\n
  5850. * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. Then
  5851. * it adds the absolute value of each difference together and writes the result to Rd.
  5852. *
  5853. * **Operations**:\n
  5854. * ~~~
  5855. * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]);
  5856. * Rd = SUM(absdiff[x]);
  5857. * for RV32: x=3...0,
  5858. * for RV64: x=7...0
  5859. * ~~~
  5860. *
  5861. * \param [in] a unsigned long type of value stored in a
  5862. * \param [in] b unsigned long type of value stored in b
  5863. * \return value stored in unsigned long type
  5864. */
  5865. __STATIC_FORCEINLINE unsigned long __RV_PBSAD(unsigned long a, unsigned long b)
  5866. {
  5867. unsigned long result;
  5868. __ASM volatile("pbsad %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5869. return result;
  5870. }
  5871. /* ===== Inline Function End for 3.77. PBSAD ===== */
  5872. /* ===== Inline Function Start for 3.78. PBSADA ===== */
  5873. /**
  5874. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
  5875. * \brief PBSADA (Parallel Byte Sum of Absolute Difference Accum)
  5876. * \details
  5877. * **Type**: DSP
  5878. *
  5879. * **Syntax**:\n
  5880. * ~~~
  5881. * PBSADA Rd, Rs1, Rs2
  5882. * ~~~
  5883. *
  5884. * **Purpose**:\n
  5885. * Calculate the sum of absolute difference of four unsigned 8-bit data elements and
  5886. * accumulate it into a register.
  5887. *
  5888. * **Description**:\n
  5889. * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. It
  5890. * then adds the absolute value of each difference together along with the content of Rd and writes the
  5891. * accumulated result back to Rd.
  5892. *
  5893. * **Operations**:\n
  5894. * ~~~
  5895. * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]);
  5896. * Rd = Rd + SUM(absdiff[x]);
  5897. * for RV32: x=3...0,
  5898. * for RV64: x=7...0
  5899. * ~~~
  5900. *
  5901. * \param [in] t unsigned long type of value stored in t
  5902. * \param [in] a unsigned long type of value stored in a
  5903. * \param [in] b unsigned long type of value stored in b
  5904. * \return value stored in unsigned long type
  5905. */
  5906. __STATIC_FORCEINLINE unsigned long __RV_PBSADA(unsigned long t, unsigned long a, unsigned long b)
  5907. {
  5908. __ASM volatile("pbsada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  5909. return t;
  5910. }
  5911. /* ===== Inline Function End for 3.78. PBSADA ===== */
  5912. /* ===== Inline Function Start for 3.79.1. PKBB16 ===== */
  5913. /**
  5914. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
  5915. * \brief PKBB16 (Pack Two 16-bit Data from Both Bottom Half)
  5916. * \details
  5917. * **Type**: DSP
  5918. *
  5919. * **Syntax**:\n
  5920. * ~~~
  5921. * PKBB16 Rd, Rs1, Rs2
  5922. * PKBT16 Rd, Rs1, Rs2
  5923. * PKTT16 Rd, Rs1, Rs2
  5924. * PKTB16 Rd, Rs1, Rs2
  5925. * ~~~
  5926. *
  5927. * **Purpose**:\n
  5928. * Pack 16-bit data from 32-bit chunks in two registers.
  5929. * * PKBB16: bottom.bottom
  5930. * * PKBT16 bottom.top
  5931. * * PKTT16 top.top
  5932. * * PKTB16 top.bottom
  5933. *
  5934. * **Description**:\n
  5935. * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
  5936. * Rd.W[x] [15:0].
  5937. * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  5938. * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  5939. * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
  5940. *
  5941. * **Operations**:\n
  5942. * ~~~
  5943. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
  5944. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
  5945. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
  5946. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
  5947. * for RV32: x=0,
  5948. * for RV64: x=1...0
  5949. * ~~~
  5950. *
  5951. * \param [in] a unsigned long type of value stored in a
  5952. * \param [in] b unsigned long type of value stored in b
  5953. * \return value stored in unsigned long type
  5954. */
  5955. __STATIC_FORCEINLINE unsigned long __RV_PKBB16(unsigned long a, unsigned long b)
  5956. {
  5957. unsigned long result;
  5958. __ASM volatile("pkbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5959. return result;
  5960. }
  5961. /* ===== Inline Function End for 3.79.1. PKBB16 ===== */
  5962. /* ===== Inline Function Start for 3.79.2. PKBT16 ===== */
  5963. /**
  5964. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
  5965. * \brief PKBT16 (Pack Two 16-bit Data from Bottom and Top Half)
  5966. * \details
  5967. * **Type**: DSP
  5968. *
  5969. * **Syntax**:\n
  5970. * ~~~
  5971. * PKBB16 Rd, Rs1, Rs2
  5972. * PKBT16 Rd, Rs1, Rs2
  5973. * PKTT16 Rd, Rs1, Rs2
  5974. * PKTB16 Rd, Rs1, Rs2
  5975. * ~~~
  5976. *
  5977. * **Purpose**:\n
  5978. * Pack 16-bit data from 32-bit chunks in two registers.
  5979. * * PKBB16: bottom.bottom
  5980. * * PKBT16 bottom.top
  5981. * * PKTT16 top.top
  5982. * * PKTB16 top.bottom
  5983. *
  5984. * **Description**:\n
  5985. * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
  5986. * Rd.W[x] [15:0].
  5987. * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  5988. * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  5989. * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
  5990. *
  5991. * **Operations**:\n
  5992. * ~~~
  5993. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
  5994. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
  5995. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
  5996. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
  5997. * for RV32: x=0,
  5998. * for RV64: x=1...0
  5999. * ~~~
  6000. *
  6001. * \param [in] a unsigned long type of value stored in a
  6002. * \param [in] b unsigned long type of value stored in b
  6003. * \return value stored in unsigned long type
  6004. */
  6005. __STATIC_FORCEINLINE unsigned long __RV_PKBT16(unsigned long a, unsigned long b)
  6006. {
  6007. unsigned long result;
  6008. __ASM volatile("pkbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6009. return result;
  6010. }
  6011. /* ===== Inline Function End for 3.79.2. PKBT16 ===== */
  6012. /* ===== Inline Function Start for 3.79.3. PKTT16 ===== */
  6013. /**
  6014. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
  6015. * \brief PKTT16 (Pack Two 16-bit Data from Both Top Half)
  6016. * \details
  6017. * **Type**: DSP
  6018. *
  6019. * **Syntax**:\n
  6020. * ~~~
  6021. * PKBB16 Rd, Rs1, Rs2
  6022. * PKBT16 Rd, Rs1, Rs2
  6023. * PKTT16 Rd, Rs1, Rs2
  6024. * PKTB16 Rd, Rs1, Rs2
  6025. * ~~~
  6026. *
  6027. * **Purpose**:\n
  6028. * Pack 16-bit data from 32-bit chunks in two registers.
  6029. * * PKBB16: bottom.bottom
  6030. * * PKBT16 bottom.top
  6031. * * PKTT16 top.top
  6032. * * PKTB16 top.bottom
  6033. *
  6034. * **Description**:\n
  6035. * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
  6036. * Rd.W[x] [15:0].
  6037. * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  6038. * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  6039. * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
  6040. *
  6041. * **Operations**:\n
  6042. * ~~~
  6043. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
  6044. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
  6045. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
  6046. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
  6047. * for RV32: x=0,
  6048. * for RV64: x=1...0
  6049. * ~~~
  6050. *
  6051. * \param [in] a unsigned long type of value stored in a
  6052. * \param [in] b unsigned long type of value stored in b
  6053. * \return value stored in unsigned long type
  6054. */
  6055. __STATIC_FORCEINLINE unsigned long __RV_PKTT16(unsigned long a, unsigned long b)
  6056. {
  6057. unsigned long result;
  6058. __ASM volatile("pktt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6059. return result;
  6060. }
  6061. /* ===== Inline Function End for 3.79.3. PKTT16 ===== */
  6062. /* ===== Inline Function Start for 3.79.4. PKTB16 ===== */
  6063. /**
  6064. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
  6065. * \brief PKTB16 (Pack Two 16-bit Data from Top and Bottom Half)
  6066. * \details
  6067. * **Type**: DSP
  6068. *
  6069. * **Syntax**:\n
  6070. * ~~~
  6071. * PKBB16 Rd, Rs1, Rs2
  6072. * PKBT16 Rd, Rs1, Rs2
  6073. * PKTT16 Rd, Rs1, Rs2
  6074. * PKTB16 Rd, Rs1, Rs2
  6075. * ~~~
  6076. *
  6077. * **Purpose**:\n
  6078. * Pack 16-bit data from 32-bit chunks in two registers.
  6079. * * PKBB16: bottom.bottom
  6080. * * PKBT16 bottom.top
  6081. * * PKTT16 top.top
  6082. * * PKTB16 top.bottom
  6083. *
  6084. * **Description**:\n
  6085. * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
  6086. * Rd.W[x] [15:0].
  6087. * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  6088. * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  6089. * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
  6090. *
  6091. * **Operations**:\n
  6092. * ~~~
  6093. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
  6094. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
  6095. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
  6096. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
  6097. * for RV32: x=0,
  6098. * for RV64: x=1...0
  6099. * ~~~
  6100. *
  6101. * \param [in] a unsigned long type of value stored in a
  6102. * \param [in] b unsigned long type of value stored in b
  6103. * \return value stored in unsigned long type
  6104. */
  6105. __STATIC_FORCEINLINE unsigned long __RV_PKTB16(unsigned long a, unsigned long b)
  6106. {
  6107. unsigned long result;
  6108. __ASM volatile("pktb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6109. return result;
  6110. }
  6111. /* ===== Inline Function End for 3.79.4. PKTB16 ===== */
  6112. /* ===== Inline Function Start for 3.80. RADD8 ===== */
  6113. /**
  6114. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  6115. * \brief RADD8 (SIMD 8-bit Signed Halving Addition)
  6116. * \details
  6117. * **Type**: SIMD
  6118. *
  6119. * **Syntax**:\n
  6120. * ~~~
  6121. * RADD8 Rd, Rs1, Rs2
  6122. * ~~~
  6123. *
  6124. * **Purpose**:\n
  6125. * Do 8-bit signed integer element additions simultaneously. The element results are halved
  6126. * to avoid overflow or saturation.
  6127. *
  6128. * **Description**:\n
  6129. * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
  6130. * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
  6131. * Rd.
  6132. *
  6133. * **Examples**:\n
  6134. * ~~~
  6135. * * Rs1 = 0x7F, Rs2 = 0x7F, Rd = 0x7F
  6136. * * Rs1 = 0x80, Rs2 = 0x80, Rd = 0x80
  6137. * * Rs1 = 0x40, Rs2 = 0x80, Rd = 0xE0
  6138. * ~~~
  6139. *
  6140. * **Operations**:\n
  6141. * ~~~
  6142. * Rd.B[x] = (Rs1.B[x] + Rs2.B[x]) s>> 1; for RV32: x=3...0, for RV64: x=7...0
  6143. * ~~~
  6144. *
  6145. * \param [in] a unsigned long type of value stored in a
  6146. * \param [in] b unsigned long type of value stored in b
  6147. * \return value stored in unsigned long type
  6148. */
  6149. __STATIC_FORCEINLINE unsigned long __RV_RADD8(unsigned long a, unsigned long b)
  6150. {
  6151. unsigned long result;
  6152. __ASM volatile("radd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6153. return result;
  6154. }
  6155. /* ===== Inline Function End for 3.80. RADD8 ===== */
  6156. /* ===== Inline Function Start for 3.81. RADD16 ===== */
  6157. /**
  6158. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  6159. * \brief RADD16 (SIMD 16-bit Signed Halving Addition)
  6160. * \details
  6161. * **Type**: SIMD
  6162. *
  6163. * **Syntax**:\n
  6164. * ~~~
  6165. * RADD16 Rd, Rs1, Rs2
  6166. * ~~~
  6167. *
  6168. * **Purpose**:\n
  6169. * Do 16-bit signed integer element additions simultaneously. The results are halved to avoid
  6170. * overflow or saturation.
  6171. *
  6172. * **Description**:\n
  6173. * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
  6174. * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
  6175. * Rd.
  6176. *
  6177. * **Examples**:\n
  6178. * ~~~
  6179. * * Rs1 = 0x7FFF, Rs2 = 0x7FFF, Rd = 0x7FFF
  6180. * * Rs1 = 0x8000, Rs2 = 0x8000, Rd = 0x8000
  6181. * * Rs1 = 0x4000, Rs2 = 0x8000, Rd = 0xE000
  6182. * ~~~
  6183. *
  6184. * **Operations**:\n
  6185. * ~~~
  6186. * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) s>> 1; for RV32: x=1...0, for RV64: x=3...0
  6187. * ~~~
  6188. *
  6189. * \param [in] a unsigned long type of value stored in a
  6190. * \param [in] b unsigned long type of value stored in b
  6191. * \return value stored in unsigned long type
  6192. */
  6193. __STATIC_FORCEINLINE unsigned long __RV_RADD16(unsigned long a, unsigned long b)
  6194. {
  6195. unsigned long result;
  6196. __ASM volatile("radd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6197. return result;
  6198. }
  6199. /* ===== Inline Function End for 3.81. RADD16 ===== */
  6200. /* ===== Inline Function Start for 3.82. RADD64 ===== */
  6201. /**
  6202. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  6203. * \brief RADD64 (64-bit Signed Halving Addition)
  6204. * \details
  6205. * **Type**: DSP (64-bit Profile)
  6206. *
  6207. * **Syntax**:\n
  6208. * ~~~
  6209. * RADD64 Rd, Rs1, Rs2
  6210. * ~~~
  6211. *
  6212. * **Purpose**:\n
  6213. * Add two 64-bit signed integers. The result is halved to avoid overflow or saturation.
  6214. *
  6215. * **RV32 Description**:\n
  6216. * This instruction adds the 64-bit signed integer of an even/odd pair of registers
  6217. * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by
  6218. * Rs2(4,1). The 64-bit addition result is first arithmetically right-shifted by 1 bit and then written to an
  6219. * even/odd pair of registers specified by Rd(4,1).
  6220. * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
  6221. * pair includes register 2d and 2d+1.
  6222. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  6223. * of the pair contains the low 32-bit of the result.
  6224. *
  6225. * **RV64 Description**:\n
  6226. * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed
  6227. * integer in Rs2. The 64-bit addition result is first arithmetically right-shifted by 1 bit and then
  6228. * written to Rd.
  6229. *
  6230. * **Operations**:\n
  6231. * ~~~
  6232. * RV32:
  6233. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  6234. * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
  6235. * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
  6236. * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) s>> 1;
  6237. * RV64:
  6238. * Rd = (Rs1 + Rs2) s>> 1;
  6239. * ~~~
  6240. *
  6241. * \param [in] a long long type of value stored in a
  6242. * \param [in] b long long type of value stored in b
  6243. * \return value stored in long long type
  6244. */
  6245. __STATIC_FORCEINLINE long long __RV_RADD64(long long a, long long b)
  6246. {
  6247. long long result;
  6248. __ASM volatile("radd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6249. return result;
  6250. }
  6251. /* ===== Inline Function End for 3.82. RADD64 ===== */
  6252. /* ===== Inline Function Start for 3.83. RADDW ===== */
  6253. /**
  6254. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  6255. * \brief RADDW (32-bit Signed Halving Addition)
  6256. * \details
  6257. * **Type**: DSP
  6258. *
  6259. * **Syntax**:\n
  6260. * ~~~
  6261. * RADDW Rd, Rs1, Rs2
  6262. * ~~~
  6263. *
  6264. * **Purpose**:\n
  6265. * Add 32-bit signed integers and the results are halved to avoid overflow or saturation.
  6266. *
  6267. * **Description**:\n
  6268. * This instruction adds the first 32-bit signed integer in Rs1 with the first 32-bit signed
  6269. * integer in Rs2. The result is first arithmetically right-shifted by 1 bit and then sign-extended and
  6270. * written to Rd.
  6271. *
  6272. * **Examples**:\n
  6273. * ~~~
  6274. * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF, Rd = 0x7FFFFFFF
  6275. * * Rs1 = 0x80000000, Rs2 = 0x80000000, Rd = 0x80000000
  6276. * * Rs1 = 0x40000000, Rs2 = 0x80000000, Rd = 0xE0000000
  6277. * ~~~
  6278. *
  6279. * **Operations**:\n
  6280. * ~~~
  6281. * RV32:
  6282. * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1;
  6283. * RV64:
  6284. * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1;
  6285. * Rd[63:0] = SE(resw[31:0]);
  6286. * ~~~
  6287. *
  6288. * \param [in] a int type of value stored in a
  6289. * \param [in] b int type of value stored in b
  6290. * \return value stored in long type
  6291. */
  6292. __STATIC_FORCEINLINE long __RV_RADDW(int a, int b)
  6293. {
  6294. long result;
  6295. __ASM volatile("raddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6296. return result;
  6297. }
  6298. /* ===== Inline Function End for 3.83. RADDW ===== */
  6299. /* ===== Inline Function Start for 3.84. RCRAS16 ===== */
  6300. /**
  6301. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  6302. * \brief RCRAS16 (SIMD 16-bit Signed Halving Cross Addition & Subtraction)
  6303. * \details
  6304. * **Type**: SIMD
  6305. *
  6306. * **Syntax**:\n
  6307. * ~~~
  6308. * RCRAS16 Rd, Rs1, Rs2
  6309. * ~~~
  6310. *
  6311. * **Purpose**:\n
  6312. * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in
  6313. * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results
  6314. * are halved to avoid overflow or saturation.
  6315. *
  6316. * **Description**:\n
  6317. * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
  6318. * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit
  6319. * signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer element in
  6320. * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and
  6321. * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  6322. *
  6323. * **Examples**:\n
  6324. * ~~~
  6325. * Please see `RADD16` and `RSUB16` instructions.
  6326. * ~~~
  6327. *
  6328. * **Operations**:\n
  6329. * ~~~
  6330. * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) s>> 1;
  6331. * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) s>> 1;
  6332. * for RV32, x=0
  6333. * for RV64, x=1...0
  6334. * ~~~
  6335. *
  6336. * \param [in] a unsigned long type of value stored in a
  6337. * \param [in] b unsigned long type of value stored in b
  6338. * \return value stored in unsigned long type
  6339. */
  6340. __STATIC_FORCEINLINE unsigned long __RV_RCRAS16(unsigned long a, unsigned long b)
  6341. {
  6342. unsigned long result;
  6343. __ASM volatile("rcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6344. return result;
  6345. }
  6346. /* ===== Inline Function End for 3.84. RCRAS16 ===== */
  6347. /* ===== Inline Function Start for 3.85. RCRSA16 ===== */
  6348. /**
  6349. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  6350. * \brief RCRSA16 (SIMD 16-bit Signed Halving Cross Subtraction & Addition)
  6351. * \details
  6352. * **Type**: SIMD
  6353. *
  6354. * **Syntax**:\n
  6355. * ~~~
  6356. * RCRSA16 Rd, Rs1, Rs2
  6357. * ~~~
  6358. *
  6359. * **Purpose**:\n
  6360. * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in
  6361. * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results
  6362. * are halved to avoid overflow or saturation.
  6363. *
  6364. * **Description**:\n
  6365. * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks
  6366. * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit
  6367. * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in
  6368. * [31:16] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and
  6369. * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  6370. *
  6371. * **Examples**:\n
  6372. * ~~~
  6373. * Please see `RADD16` and `RSUB16` instructions.
  6374. * ~~~
  6375. *
  6376. * **Operations**:\n
  6377. * ~~~
  6378. * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) s>> 1;
  6379. * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) s>> 1;
  6380. * for RV32, x=0
  6381. * for RV64, x=1...0
  6382. * ~~~
  6383. *
  6384. * \param [in] a unsigned long type of value stored in a
  6385. * \param [in] b unsigned long type of value stored in b
  6386. * \return value stored in unsigned long type
  6387. */
  6388. __STATIC_FORCEINLINE unsigned long __RV_RCRSA16(unsigned long a, unsigned long b)
  6389. {
  6390. unsigned long result;
  6391. __ASM volatile("rcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6392. return result;
  6393. }
  6394. /* ===== Inline Function End for 3.85. RCRSA16 ===== */
  6395. /* ===== Inline Function Start for 3.86. RDOV ===== */
  6396. /**
  6397. * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC
  6398. * \brief RDOV (Read OV flag)
  6399. * \details
  6400. * **Type**: DSP
  6401. *
  6402. * **Syntax**:\n
  6403. * ~~~
  6404. * RDOV Rd # pseudo mnemonic
  6405. * ~~~
  6406. *
  6407. * **Purpose**:\n
  6408. * This pseudo instruction is an alias to `CSRR Rd, ucode` instruction which maps to the real
  6409. * instruction of `CSRRS Rd, ucode, x0`.
  6410. *
  6411. *
  6412. * \return value stored in unsigned long type
  6413. */
  6414. __STATIC_FORCEINLINE unsigned long __RV_RDOV(void)
  6415. {
  6416. unsigned long result;
  6417. __ASM volatile("rdov %0" : "=r"(result));
  6418. return result;
  6419. }
  6420. /* ===== Inline Function End for 3.86. RDOV ===== */
  6421. /* ===== Inline Function Start for 3.87. RSTAS16 ===== */
  6422. /**
  6423. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  6424. * \brief RSTAS16 (SIMD 16-bit Signed Halving Straight Addition & Subtraction)
  6425. * \details
  6426. * **Type**: SIMD
  6427. *
  6428. * **Syntax**:\n
  6429. * ~~~
  6430. * RSTAS16 Rd, Rs1, Rs2
  6431. * ~~~
  6432. *
  6433. * **Purpose**:\n
  6434. * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in
  6435. * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The
  6436. * results are halved to avoid overflow or saturation.
  6437. *
  6438. * **Description**:\n
  6439. * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
  6440. * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit
  6441. * signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer element in
  6442. * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and
  6443. * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  6444. *
  6445. * **Examples**:\n
  6446. * ~~~
  6447. * Please see `RADD16` and `RSUB16` instructions.
  6448. * ~~~
  6449. *
  6450. * **Operations**:\n
  6451. * ~~~
  6452. * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) s>> 1;
  6453. * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) s>> 1;
  6454. * for RV32, x=0
  6455. * for RV64, x=1...0
  6456. * ~~~
  6457. *
  6458. * \param [in] a unsigned long type of value stored in a
  6459. * \param [in] b unsigned long type of value stored in b
  6460. * \return value stored in unsigned long type
  6461. */
  6462. __STATIC_FORCEINLINE unsigned long __RV_RSTAS16(unsigned long a, unsigned long b)
  6463. {
  6464. unsigned long result;
  6465. __ASM volatile("rstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6466. return result;
  6467. }
  6468. /* ===== Inline Function End for 3.87. RSTAS16 ===== */
  6469. /* ===== Inline Function Start for 3.88. RSTSA16 ===== */
  6470. /**
  6471. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  6472. * \brief RSTSA16 (SIMD 16-bit Signed Halving Straight Subtraction & Addition)
  6473. * \details
  6474. * **Type**: SIMD
  6475. *
  6476. * **Syntax**:\n
  6477. * ~~~
  6478. * RSTSA16 Rd, Rs1, Rs2
  6479. * ~~~
  6480. *
  6481. * **Purpose**:\n
  6482. * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in
  6483. * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The
  6484. * results are halved to avoid overflow or saturation.
  6485. *
  6486. * **Description**:\n
  6487. * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks
  6488. * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit
  6489. * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in
  6490. * [15:0] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and then
  6491. * written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  6492. *
  6493. * **Examples**:\n
  6494. * ~~~
  6495. * Please see `RADD16` and `RSUB16` instructions.
  6496. * ~~~
  6497. *
  6498. * **Operations**:\n
  6499. * ~~~
  6500. * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) s>> 1;
  6501. * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) s>> 1;
  6502. * for RV32, x=0
  6503. * for RV64, x=1...0
  6504. * ~~~
  6505. *
  6506. * \param [in] a unsigned long type of value stored in a
  6507. * \param [in] b unsigned long type of value stored in b
  6508. * \return value stored in unsigned long type
  6509. */
  6510. __STATIC_FORCEINLINE unsigned long __RV_RSTSA16(unsigned long a, unsigned long b)
  6511. {
  6512. unsigned long result;
  6513. __ASM volatile("rstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6514. return result;
  6515. }
  6516. /* ===== Inline Function End for 3.88. RSTSA16 ===== */
  6517. /* ===== Inline Function Start for 3.89. RSUB8 ===== */
  6518. /**
  6519. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  6520. * \brief RSUB8 (SIMD 8-bit Signed Halving Subtraction)
  6521. * \details
  6522. * **Type**: SIMD
  6523. *
  6524. * **Syntax**:\n
  6525. * ~~~
  6526. * RSUB8 Rd, Rs1, Rs2
  6527. * ~~~
  6528. *
  6529. * **Purpose**:\n
  6530. * Do 8-bit signed integer element subtractions simultaneously. The results are halved to
  6531. * avoid overflow or saturation.
  6532. *
  6533. * **Description**:\n
  6534. * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
  6535. * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
  6536. * written to Rd.
  6537. *
  6538. * **Examples**:\n
  6539. * ~~~
  6540. * * Rs1 = 0x7F, Rs2 = 0x80, Rd = 0x7F
  6541. * * Rs1 = 0x80, Rs2 = 0x7F, Rd = 0x80
  6542. * * Rs1= 0x80, Rs2 = 0x40, Rd = 0xA0
  6543. * ~~~
  6544. *
  6545. * **Operations**:\n
  6546. * ~~~
  6547. * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) s>> 1;
  6548. * for RV32: x=3...0,
  6549. * for RV64: x=7...0
  6550. * ~~~
  6551. *
  6552. * \param [in] a unsigned long type of value stored in a
  6553. * \param [in] b unsigned long type of value stored in b
  6554. * \return value stored in unsigned long type
  6555. */
  6556. __STATIC_FORCEINLINE unsigned long __RV_RSUB8(unsigned long a, unsigned long b)
  6557. {
  6558. unsigned long result;
  6559. __ASM volatile("rsub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6560. return result;
  6561. }
  6562. /* ===== Inline Function End for 3.89. RSUB8 ===== */
  6563. /* ===== Inline Function Start for 3.90. RSUB16 ===== */
  6564. /**
  6565. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  6566. * \brief RSUB16 (SIMD 16-bit Signed Halving Subtraction)
  6567. * \details
  6568. * **Type**: SIMD
  6569. *
  6570. * **Syntax**:\n
  6571. * ~~~
  6572. * RSUB16 Rd, Rs1, Rs2
  6573. * ~~~
  6574. *
  6575. * **Purpose**:\n
  6576. * Do 16-bit signed integer element subtractions simultaneously. The results are halved to
  6577. * avoid overflow or saturation.
  6578. *
  6579. * **Description**:\n
  6580. * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
  6581. * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
  6582. * written to Rd.
  6583. *
  6584. * **Examples**:\n
  6585. * ~~~
  6586. * * Ra = 0x7FFF, Rb = 0x8000, Rt = 0x7FFF
  6587. * * Ra = 0x8000, Rb = 0x7FFF, Rt = 0x8000
  6588. * * Ra = 0x8000, Rb = 0x4000, Rt = 0xA000
  6589. * ~~~
  6590. *
  6591. * **Operations**:\n
  6592. * ~~~
  6593. * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) s>> 1;
  6594. * for RV32: x=1...0,
  6595. * for RV64: x=3...0
  6596. * ~~~
  6597. *
  6598. * \param [in] a unsigned long type of value stored in a
  6599. * \param [in] b unsigned long type of value stored in b
  6600. * \return value stored in unsigned long type
  6601. */
  6602. __STATIC_FORCEINLINE unsigned long __RV_RSUB16(unsigned long a, unsigned long b)
  6603. {
  6604. unsigned long result;
  6605. __ASM volatile("rsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6606. return result;
  6607. }
  6608. /* ===== Inline Function End for 3.90. RSUB16 ===== */
  6609. /* ===== Inline Function Start for 3.91. RSUB64 ===== */
  6610. /**
  6611. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  6612. * \brief RSUB64 (64-bit Signed Halving Subtraction)
  6613. * \details
  6614. * **Type**: DSP (64-bit Profile)
  6615. *
  6616. * **Syntax**:\n
  6617. * ~~~
  6618. * RSUB64 Rd, Rs1, Rs2
  6619. * ~~~
  6620. *
  6621. * **Purpose**:\n
  6622. * Perform a 64-bit signed integer subtraction. The result is halved to avoid overflow or
  6623. * saturation.
  6624. *
  6625. * **RV32 Description**:\n
  6626. * This instruction subtracts the 64-bit signed integer of an even/odd pair of
  6627. * registers specified by Rb(4,1) from the 64-bit signed integer of an even/odd pair of registers
  6628. * specified by Ra(4,1). The subtraction result is first arithmetically right-shifted by 1 bit and then
  6629. * written to an even/odd pair of registers specified by Rt(4,1).
  6630. * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
  6631. * pair includes register 2d and 2d+1.
  6632. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  6633. * of the pair contains the low 32-bit of the result.
  6634. *
  6635. * **RV64 Description**:\n
  6636. * This instruction subtracts the 64-bit signed integer in Rs2 from the 64-bit signed
  6637. * integer in Rs1. The 64-bit subtraction result is first arithmetically right-shifted by 1 bit and then
  6638. * written to Rd.
  6639. *
  6640. * **Operations**:\n
  6641. * ~~~
  6642. * RV32:
  6643. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  6644. * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
  6645. * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
  6646. * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) s>> 1;
  6647. * RV64:
  6648. * Rd = (Rs1 - Rs2) s>> 1;
  6649. * ~~~
  6650. *
  6651. * \param [in] a long long type of value stored in a
  6652. * \param [in] b long long type of value stored in b
  6653. * \return value stored in long long type
  6654. */
  6655. __STATIC_FORCEINLINE long long __RV_RSUB64(long long a, long long b)
  6656. {
  6657. long long result;
  6658. __ASM volatile("rsub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6659. return result;
  6660. }
  6661. /* ===== Inline Function End for 3.91. RSUB64 ===== */
  6662. /* ===== Inline Function Start for 3.92. RSUBW ===== */
  6663. /**
  6664. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  6665. * \brief RSUBW (32-bit Signed Halving Subtraction)
  6666. * \details
  6667. * **Type**: DSP
  6668. *
  6669. * **Syntax**:\n
  6670. * ~~~
  6671. * RSUBW Rd, Rs1, Rs2
  6672. * ~~~
  6673. *
  6674. * **Purpose**:\n
  6675. * Subtract 32-bit signed integers and the result is halved to avoid overflow or saturation.
  6676. *
  6677. * **Description**:\n
  6678. * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit
  6679. * signed integer in Rs1. The result is first arithmetically right-shifted by 1 bit and then sign-extended
  6680. * and written to Rd.
  6681. *
  6682. * **Examples**:\n
  6683. * ~~~
  6684. * * Rs1 = 0x7FFFFFFF, Rs2 = 0x80000000, Rd = 0x7FFFFFFF
  6685. * * Rs1 = 0x80000000, Rs2 = 0x7FFFFFFF, Rd = 0x80000000
  6686. * * Rs1 = 0x80000000, Rs2 = 0x40000000, Rd = 0xA0000000
  6687. * ~~~
  6688. *
  6689. * **Operations**:\n
  6690. * ~~~
  6691. * RV32:
  6692. * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1;
  6693. * RV64:
  6694. * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1;
  6695. * Rd[63:0] = SE(resw[31:0]);
  6696. * ~~~
  6697. *
  6698. * \param [in] a int type of value stored in a
  6699. * \param [in] b int type of value stored in b
  6700. * \return value stored in long type
  6701. */
  6702. __STATIC_FORCEINLINE long __RV_RSUBW(int a, int b)
  6703. {
  6704. long result;
  6705. __ASM volatile("rsubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6706. return result;
  6707. }
  6708. /* ===== Inline Function End for 3.92. RSUBW ===== */
  6709. /* ===== Inline Function Start for 3.93. SCLIP8 ===== */
  6710. /**
  6711. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  6712. * \brief SCLIP8 (SIMD 8-bit Signed Clip Value)
  6713. * \details
  6714. * **Type**: SIMD
  6715. *
  6716. * **Syntax**:\n
  6717. * ~~~
  6718. * SCLIP8 Rd, Rs1, imm3u[2:0]
  6719. * ~~~
  6720. *
  6721. * **Purpose**:\n
  6722. * Limit the 8-bit signed integer elements of a register into a signed range simultaneously.
  6723. *
  6724. * **Description**:\n
  6725. * This instruction limits the 8-bit signed integer elements stored in Rs1 into a signed
  6726. * integer range between 2^imm3u-1 and -2^imm3u, and writes the limited results to Rd. For example, if
  6727. * imm3u is 3, the 8-bit input values should be saturated between 7 and -8. If saturation is performed,
  6728. * set OV bit to 1.
  6729. *
  6730. * **Operations**:\n
  6731. * ~~~
  6732. * src = Rs1.B[x];
  6733. * if (src > (2^imm3u)-1) {
  6734. * src = (2^imm3u)-1;
  6735. * OV = 1;
  6736. * } else if (src < -2^imm3u) {
  6737. * src = -2^imm3u;
  6738. * OV = 1;
  6739. * }
  6740. * Rd.B[x] = src
  6741. * for RV32: x=3...0,
  6742. * for RV64: x=7...0
  6743. * ~~~
  6744. *
  6745. * \param [in] a unsigned long type of value stored in a
  6746. * \param [in] b unsigned int type of value stored in b
  6747. * \return value stored in unsigned long type
  6748. */
  6749. #define __RV_SCLIP8(a, b) \
  6750. ({ \
  6751. unsigned long result; \
  6752. unsigned long __a = (unsigned long)(a); \
  6753. __ASM volatile("sclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  6754. result; \
  6755. })
  6756. /* ===== Inline Function End for 3.93. SCLIP8 ===== */
  6757. /* ===== Inline Function Start for 3.94. SCLIP16 ===== */
  6758. /**
  6759. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  6760. * \brief SCLIP16 (SIMD 16-bit Signed Clip Value)
  6761. * \details
  6762. * **Type**: SIMD
  6763. *
  6764. * **Syntax**:\n
  6765. * ~~~
  6766. * SCLIP16 Rd, Rs1, imm4u[3:0]
  6767. * ~~~
  6768. *
  6769. * **Purpose**:\n
  6770. * Limit the 16-bit signed integer elements of a register into a signed range simultaneously.
  6771. *
  6772. * **Description**:\n
  6773. * This instruction limits the 16-bit signed integer elements stored in Rs1 into a signed
  6774. * integer range between 2imm4u-1 and -2imm4u, and writes the limited results to Rd. For example, if
  6775. * imm4u is 3, the 16-bit input values should be saturated between 7 and -8. If saturation is performed,
  6776. * set OV bit to 1.
  6777. *
  6778. * **Operations**:\n
  6779. * ~~~
  6780. * src = Rs1.H[x];
  6781. * if (src > (2^imm4u)-1) {
  6782. * src = (2^imm4u)-1;
  6783. * OV = 1;
  6784. * } else if (src < -2^imm4u) {
  6785. * src = -2^imm4u;
  6786. * OV = 1;
  6787. * }
  6788. * Rd.H[x] = src
  6789. * for RV32: x=1...0,
  6790. * for RV64: x=3...0
  6791. * ~~~
  6792. *
  6793. * \param [in] a unsigned long type of value stored in a
  6794. * \param [in] b unsigned int type of value stored in b
  6795. * \return value stored in unsigned long type
  6796. */
  6797. #define __RV_SCLIP16(a, b) \
  6798. ({ \
  6799. unsigned long result; \
  6800. unsigned long __a = (unsigned long)(a); \
  6801. __ASM volatile("sclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  6802. result; \
  6803. })
  6804. /* ===== Inline Function End for 3.94. SCLIP16 ===== */
  6805. /* ===== Inline Function Start for 3.95. SCLIP32 ===== */
  6806. /**
  6807. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
  6808. * \brief SCLIP32 (SIMD 32-bit Signed Clip Value)
  6809. * \details
  6810. * **Type**: DSP
  6811. *
  6812. * **Syntax**:\n
  6813. * ~~~
  6814. * SCLIP32 Rd, Rs1, imm5u[4:0]
  6815. * ~~~
  6816. *
  6817. * **Purpose**:\n
  6818. * Limit the 32-bit signed integer elements of a register into a signed range simultaneously.
  6819. *
  6820. * **Description**:\n
  6821. * This instruction limits the 32-bit signed integer elements stored in Rs1 into a signed
  6822. * integer range between 2imm5u-1 and -2imm5u, and writes the limited results to Rd. For example, if
  6823. * imm5u is 3, the 32-bit input values should be saturated between 7 and -8. If saturation is performed,
  6824. * set OV bit to 1.
  6825. *
  6826. * **Operations**:\n
  6827. * ~~~
  6828. * src = Rs1.W[x];
  6829. * if (src > (2^imm5u)-1) {
  6830. * src = (2^imm5u)-1;
  6831. * OV = 1;
  6832. * } else if (src < -2^imm5u) {
  6833. * src = -2^imm5u;
  6834. * OV = 1;
  6835. * }
  6836. * Rd.W[x] = src
  6837. * for RV32: x=0,
  6838. * for RV64: x=1...0
  6839. * ~~~
  6840. *
  6841. * \param [in] a long type of value stored in a
  6842. * \param [in] b unsigned int type of value stored in b
  6843. * \return value stored in long type
  6844. */
  6845. #define __RV_SCLIP32(a, b) \
  6846. ({ \
  6847. long result; \
  6848. long __a = (long)(a); \
  6849. __ASM volatile("sclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  6850. result; \
  6851. })
  6852. /* ===== Inline Function End for 3.95. SCLIP32 ===== */
  6853. /* ===== Inline Function Start for 3.96. SCMPLE8 ===== */
  6854. /**
  6855. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
  6856. * \brief SCMPLE8 (SIMD 8-bit Signed Compare Less Than & Equal)
  6857. * \details
  6858. * **Type**: SIMD
  6859. *
  6860. * **Syntax**:\n
  6861. * ~~~
  6862. * SCMPLE8 Rd, Rs1, Rs2
  6863. * ~~~
  6864. *
  6865. * **Purpose**:\n
  6866. * Do 8-bit signed integer elements less than & equal comparisons simultaneously.
  6867. *
  6868. * **Description**:\n
  6869. * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
  6870. * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is
  6871. * true, the result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to
  6872. * Rd
  6873. *
  6874. * **Operations**:\n
  6875. * ~~~
  6876. * Rd.B[x] = (Rs1.B[x] {le} Rs2.B[x])? 0xff : 0x0;
  6877. * for RV32: x=3...0,
  6878. * for RV64: x=7...0
  6879. * ~~~
  6880. *
  6881. * \param [in] a unsigned long type of value stored in a
  6882. * \param [in] b unsigned long type of value stored in b
  6883. * \return value stored in unsigned long type
  6884. */
  6885. __STATIC_FORCEINLINE unsigned long __RV_SCMPLE8(unsigned long a, unsigned long b)
  6886. {
  6887. unsigned long result;
  6888. __ASM volatile("scmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6889. return result;
  6890. }
  6891. /* ===== Inline Function End for 3.96. SCMPLE8 ===== */
  6892. /* ===== Inline Function Start for 3.97. SCMPLE16 ===== */
  6893. /**
  6894. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
  6895. * \brief SCMPLE16 (SIMD 16-bit Signed Compare Less Than & Equal)
  6896. * \details
  6897. * **Type**: SIMD
  6898. *
  6899. * **Syntax**:\n
  6900. * ~~~
  6901. * SCMPLE16 Rd, Rs1, Rs2
  6902. * ~~~
  6903. *
  6904. * **Purpose**:\n
  6905. * Do 16-bit signed integer elements less than & equal comparisons simultaneously.
  6906. *
  6907. * **Description**:\n
  6908. * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
  6909. * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is
  6910. * true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written
  6911. * to Rd.
  6912. *
  6913. * **Operations**:\n
  6914. * ~~~
  6915. * Rd.H[x] = (Rs1.H[x] {le} Rs2.H[x])? 0xffff : 0x0;
  6916. * for RV32: x=1...0,
  6917. * for RV64: x=3...0
  6918. * ~~~
  6919. *
  6920. * \param [in] a unsigned long type of value stored in a
  6921. * \param [in] b unsigned long type of value stored in b
  6922. * \return value stored in unsigned long type
  6923. */
  6924. __STATIC_FORCEINLINE unsigned long __RV_SCMPLE16(unsigned long a, unsigned long b)
  6925. {
  6926. unsigned long result;
  6927. __ASM volatile("scmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6928. return result;
  6929. }
  6930. /* ===== Inline Function End for 3.97. SCMPLE16 ===== */
  6931. /* ===== Inline Function Start for 3.98. SCMPLT8 ===== */
  6932. /**
  6933. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
  6934. * \brief SCMPLT8 (SIMD 8-bit Signed Compare Less Than)
  6935. * \details
  6936. * **Type**: SIMD
  6937. *
  6938. * **Syntax**:\n
  6939. * ~~~
  6940. * SCMPLT8 Rd, Rs1, Rs2
  6941. * ~~~
  6942. *
  6943. * **Purpose**:\n
  6944. * Do 8-bit signed integer elements less than comparisons simultaneously.
  6945. *
  6946. * **Description**:\n
  6947. * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
  6948. * signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
  6949. * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
  6950. *
  6951. * **Operations**:\n
  6952. * ~~~
  6953. * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? 0xff : 0x0;
  6954. * for RV32: x=3...0,
  6955. * for RV64: x=7...0
  6956. * ~~~
  6957. *
  6958. * \param [in] a unsigned long type of value stored in a
  6959. * \param [in] b unsigned long type of value stored in b
  6960. * \return value stored in unsigned long type
  6961. */
  6962. __STATIC_FORCEINLINE unsigned long __RV_SCMPLT8(unsigned long a, unsigned long b)
  6963. {
  6964. unsigned long result;
  6965. __ASM volatile("scmplt8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6966. return result;
  6967. }
  6968. /* ===== Inline Function End for 3.98. SCMPLT8 ===== */
  6969. /* ===== Inline Function Start for 3.99. SCMPLT16 ===== */
  6970. /**
  6971. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
  6972. * \brief SCMPLT16 (SIMD 16-bit Signed Compare Less Than)
  6973. * \details
  6974. * **Type**: SIMD
  6975. *
  6976. * **Syntax**:\n
  6977. * ~~~
  6978. * SCMPLT16 Rd, Rs1, Rs2
  6979. * ~~~
  6980. *
  6981. * **Purpose**:\n
  6982. * Do 16-bit signed integer elements less than comparisons simultaneously.
  6983. *
  6984. * **Description**:\n
  6985. * This instruction compares the 16-bit signed integer elements in Rs1 with the two 16-
  6986. * bit signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
  6987. * result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
  6988. *
  6989. * **Operations**:\n
  6990. * ~~~
  6991. * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? 0xffff : 0x0;
  6992. * for RV32: x=1...0,
  6993. * for RV64: x=3...0
  6994. * ~~~
  6995. *
  6996. * \param [in] a unsigned long type of value stored in a
  6997. * \param [in] b unsigned long type of value stored in b
  6998. * \return value stored in unsigned long type
  6999. */
  7000. __STATIC_FORCEINLINE unsigned long __RV_SCMPLT16(unsigned long a, unsigned long b)
  7001. {
  7002. unsigned long result;
  7003. __ASM volatile("scmplt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  7004. return result;
  7005. }
  7006. /* ===== Inline Function End for 3.99. SCMPLT16 ===== */
  7007. /* ===== Inline Function Start for 3.100. SLL8 ===== */
  7008. /**
  7009. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  7010. * \brief SLL8 (SIMD 8-bit Shift Left Logical)
  7011. * \details
  7012. * **Type**: SIMD
  7013. *
  7014. * **Syntax**:\n
  7015. * ~~~
  7016. * SLL8 Rd, Rs1, Rs2
  7017. * ~~~
  7018. *
  7019. * **Purpose**:\n
  7020. * Do 8-bit elements logical left shift operations simultaneously. The shift amount is a
  7021. * variable from a GPR.
  7022. *
  7023. * **Description**:\n
  7024. * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
  7025. * The shifted out bits are filled with zero and the shift amount is specified by the low-order 3-bits of
  7026. * the value in the Rs2 register.
  7027. *
  7028. * **Operations**:\n
  7029. * ~~~
  7030. * sa = Rs2[2:0];
  7031. * Rd.B[x] = Rs1.B[x] << sa;
  7032. * for RV32: x=3...0,
  7033. * for RV64: x=7...0
  7034. * ~~~
  7035. *
  7036. * \param [in] a unsigned long type of value stored in a
  7037. * \param [in] b unsigned int type of value stored in b
  7038. * \return value stored in unsigned long type
  7039. */
  7040. __STATIC_FORCEINLINE unsigned long __RV_SLL8(unsigned long a, unsigned int b)
  7041. {
  7042. unsigned long result;
  7043. __ASM volatile("sll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  7044. return result;
  7045. }
  7046. /* ===== Inline Function End for 3.100. SLL8 ===== */
  7047. /* ===== Inline Function Start for 3.101. SLLI8 ===== */
  7048. /**
  7049. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  7050. * \brief SLLI8 (SIMD 8-bit Shift Left Logical Immediate)
  7051. * \details
  7052. * **Type**: SIMD
  7053. *
  7054. * **Syntax**:\n
  7055. * ~~~
  7056. * SLLI8 Rd, Rs1, imm3u
  7057. * ~~~
  7058. *
  7059. * **Purpose**:\n
  7060. * Do 8-bit elements logical left shift operations simultaneously. The shift amount is an
  7061. * immediate value.
  7062. *
  7063. * **Description**:\n
  7064. * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
  7065. * The shifted out bits are filled with zero and the shift amount is specified by the imm3u constant.
  7066. *
  7067. * **Operations**:\n
  7068. * ~~~
  7069. * sa = imm3u[2:0];
  7070. * Rd.B[x] = Rs1.B[x] << sa;
  7071. * for RV32: x=3...0,
  7072. * for RV64: x=7...0
  7073. * ~~~
  7074. *
  7075. * \param [in] a unsigned long type of value stored in a
  7076. * \param [in] b unsigned int type of value stored in b
  7077. * \return value stored in unsigned long type
  7078. */
  7079. #define __RV_SLLI8(a, b) \
  7080. ({ \
  7081. unsigned long result; \
  7082. unsigned long __a = (unsigned long)(a); \
  7083. __ASM volatile("slli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  7084. result; \
  7085. })
  7086. /* ===== Inline Function End for 3.101. SLLI8 ===== */
  7087. /* ===== Inline Function Start for 3.102. SLL16 ===== */
  7088. /**
  7089. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  7090. * \brief SLL16 (SIMD 16-bit Shift Left Logical)
  7091. * \details
  7092. * **Type**: SIMD
  7093. *
  7094. * **Syntax**:\n
  7095. * ~~~
  7096. * SLL16 Rd, Rs1, Rs2
  7097. * ~~~
  7098. *
  7099. * **Purpose**:\n
  7100. * Do 16-bit elements logical left shift operations simultaneously. The shift amount is a
  7101. * variable from a GPR.
  7102. *
  7103. * **Description**:\n
  7104. * The 16-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
  7105. * The shifted out bits are filled with zero and the shift amount is specified by the low-order 4-bits of
  7106. * the value in the Rs2 register.
  7107. *
  7108. * **Operations**:\n
  7109. * ~~~
  7110. * sa = Rs2[3:0];
  7111. * Rd.H[x] = Rs1.H[x] << sa;
  7112. * for RV32: x=1...0,
  7113. * for RV64: x=3...0
  7114. * ~~~
  7115. *
  7116. * \param [in] a unsigned long type of value stored in a
  7117. * \param [in] b unsigned int type of value stored in b
  7118. * \return value stored in unsigned long type
  7119. */
  7120. __STATIC_FORCEINLINE unsigned long __RV_SLL16(unsigned long a, unsigned int b)
  7121. {
  7122. unsigned long result;
  7123. __ASM volatile("sll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  7124. return result;
  7125. }
  7126. /* ===== Inline Function End for 3.102. SLL16 ===== */
  7127. /* ===== Inline Function Start for 3.103. SLLI16 ===== */
  7128. /**
  7129. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  7130. * \brief SLLI16 (SIMD 16-bit Shift Left Logical Immediate)
  7131. * \details
  7132. * **Type**: SIMD
  7133. *
  7134. * **Syntax**:\n
  7135. * ~~~
  7136. * SLLI16 Rd, Rs1, imm4[3:0]
  7137. * ~~~
  7138. *
  7139. * **Purpose**:\n
  7140. * Do 16-bit element logical left shift operations simultaneously. The shift amount is an
  7141. * immediate value.
  7142. *
  7143. * **Description**:\n
  7144. * The 16-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with
  7145. * zero and the shift amount is specified by the imm4[3:0] constant. And the results are written to Rd.
  7146. *
  7147. * **Operations**:\n
  7148. * ~~~
  7149. * sa = imm4[3:0];
  7150. * Rd.H[x] = Rs1.H[x] << sa;
  7151. * for RV32: x=1...0,
  7152. * for RV64: x=3...0
  7153. * ~~~
  7154. *
  7155. * \param [in] a unsigned long type of value stored in a
  7156. * \param [in] b unsigned int type of value stored in b
  7157. * \return value stored in unsigned long type
  7158. */
  7159. #define __RV_SLLI16(a, b) \
  7160. ({ \
  7161. unsigned long result; \
  7162. unsigned long __a = (unsigned long)(a); \
  7163. __ASM volatile("slli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  7164. result; \
  7165. })
  7166. /* ===== Inline Function End for 3.103. SLLI16 ===== */
  7167. /* ===== Inline Function Start for 3.104. SMAL ===== */
  7168. /**
  7169. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7170. * \brief SMAL (Signed Multiply Halfs & Add 64-bit)
  7171. * \details
  7172. * **Type**: Partial-SIMD
  7173. *
  7174. * **Syntax**:\n
  7175. * ~~~
  7176. * SMAL Rd, Rs1, Rs2
  7177. * ~~~
  7178. *
  7179. * **Purpose**:\n
  7180. * Multiply the signed bottom 16-bit content of the 32-bit elements of a register with the top
  7181. * 16-bit content of the same 32-bit elements of the same register, and add the results with a 64-bit
  7182. * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
  7183. * to another even/odd pair of registers (RV32) or a register (RV64).
  7184. *
  7185. * **RV32 Description**:\n
  7186. * This instruction multiplies the bottom 16-bit content of the lower 32-bit of Rs2 with the top 16-bit
  7187. * content of the lower 32-bit of Rs2 and adds the result with the 64-bit value of an even/odd pair of
  7188. * registers specified by Rs1(4,1). The 64-bit addition result is written back to an even/odd pair of
  7189. * registers specified by Rd(4,1). The 16-bit values of Rs2, and the 64-bit value of the Rs1(4,1) register-
  7190. * pair are treated as signed integers.
  7191. * Rx(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7192. * includes register 2d and 2d+1.
  7193. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7194. * register of the pair contains the low 32-bit of the operand.
  7195. *
  7196. * **RV64 Description**:\n
  7197. * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs2 with the top 16-bit
  7198. * content of the same 32-bit elements of Rs2 and adds the results with the 64-bit value of Rs1. The 64-
  7199. * bit addition result is written back to Rd. The 16-bit values of Rs2, and the 64-bit value of Rs1 are
  7200. * treated as signed integers.
  7201. *
  7202. * **Operations**:\n
  7203. * ~~~
  7204. * RV32:
  7205. * Mres[31:0] = Rs2.H[1] * Rs2.H[0];
  7206. * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1); +
  7207. * Idx2 = CONCAT(Rd(4,1),1'b0); Idx3 = CONCAT(Rd(4,1),1'b1);
  7208. * R[Idx3].R[Idx2] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
  7209. * RV64:
  7210. * Mres[0][31:0] = Rs2.W[0].H[1] * Rs2.W[0].H[0];
  7211. * Mres[1][31:0] = Rs2.W[1].H[1] * Rs2.W[1].H[0];
  7212. * Rd = Rs1 + SE64(Mres[1][31:0]) + SE64(Mres[0][31:0]);
  7213. * ~~~
  7214. *
  7215. * \param [in] a long long type of value stored in a
  7216. * \param [in] b unsigned long type of value stored in b
  7217. * \return value stored in long long type
  7218. */
  7219. __STATIC_FORCEINLINE long long __RV_SMAL(long long a, unsigned long b)
  7220. {
  7221. long long result;
  7222. __ASM volatile("smal %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  7223. return result;
  7224. }
  7225. /* ===== Inline Function End for 3.104. SMAL ===== */
  7226. /* ===== Inline Function Start for 3.105.1. SMALBB ===== */
  7227. /**
  7228. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7229. * \brief SMALBB (Signed Multiply Bottom Halfs & Add 64-bit)
  7230. * \details
  7231. * **Type**: DSP (64-bit Profile)
  7232. *
  7233. * **Syntax**:\n
  7234. * ~~~
  7235. * SMALBB Rd, Rs1, Rs2
  7236. * SMALBT Rd, Rs1, Rs2
  7237. * SMALTT Rd, Rs1, Rs2
  7238. * ~~~
  7239. *
  7240. * **Purpose**:\n
  7241. * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
  7242. * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
  7243. * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
  7244. * to the register-pair (RV32) or the register (RV64).
  7245. * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
  7246. * * SMALBT rt pair + bottom*top (all 32-bit elements)
  7247. * * SMALTT rt pair + top*top (all 32-bit elements)
  7248. *
  7249. * **RV32 Description**:\n
  7250. * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7251. * content of Rs2.
  7252. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
  7253. * content of Rs2.
  7254. * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
  7255. * of Rs2.
  7256. * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
  7257. * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
  7258. * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  7259. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7260. * includes register 2d and 2d+1.
  7261. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7262. * register of the pair contains the low 32-bit of the operand.
  7263. *
  7264. * **RV64 Description**:\n
  7265. * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7266. * with the bottom 16-bit content of the 32-bit elements of Rs2.
  7267. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7268. * with the top 16-bit content of the 32-bit elements of Rs2.
  7269. * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  7270. * the top 16-bit content of the 32-bit elements of Rs2.
  7271. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
  7272. * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
  7273. * integers.
  7274. *
  7275. * **Operations**:\n
  7276. * ~~~
  7277. * RV32:
  7278. * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
  7279. * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
  7280. * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
  7281. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7282. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
  7283. * RV64:
  7284. * // SMALBB
  7285. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
  7286. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
  7287. * // SMALBT
  7288. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
  7289. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
  7290. * // SMALTT
  7291. * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
  7292. * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
  7293. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  7294. * ~~~
  7295. *
  7296. * \param [in] t long long type of value stored in t
  7297. * \param [in] a unsigned long type of value stored in a
  7298. * \param [in] b unsigned long type of value stored in b
  7299. * \return value stored in long long type
  7300. */
  7301. __STATIC_FORCEINLINE long long __RV_SMALBB(long long t, unsigned long a, unsigned long b)
  7302. {
  7303. __ASM volatile("smalbb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7304. return t;
  7305. }
  7306. /* ===== Inline Function End for 3.105.1. SMALBB ===== */
  7307. /* ===== Inline Function Start for 3.105.2. SMALBT ===== */
  7308. /**
  7309. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7310. * \brief SMALBT (Signed Multiply Bottom Half & Top Half & Add 64-bit)
  7311. * \details
  7312. * **Type**: DSP (64-bit Profile)
  7313. *
  7314. * **Syntax**:\n
  7315. * ~~~
  7316. * SMALBB Rd, Rs1, Rs2
  7317. * SMALBT Rd, Rs1, Rs2
  7318. * SMALTT Rd, Rs1, Rs2
  7319. * ~~~
  7320. *
  7321. * **Purpose**:\n
  7322. * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
  7323. * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
  7324. * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
  7325. * to the register-pair (RV32) or the register (RV64).
  7326. * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
  7327. * * SMALBT rt pair + bottom*top (all 32-bit elements)
  7328. * * SMALTT rt pair + top*top (all 32-bit elements)
  7329. *
  7330. * **RV32 Description**:\n
  7331. * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7332. * content of Rs2.
  7333. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
  7334. * content of Rs2.
  7335. * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
  7336. * of Rs2.
  7337. * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
  7338. * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
  7339. * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  7340. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7341. * includes register 2d and 2d+1.
  7342. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7343. * register of the pair contains the low 32-bit of the operand.
  7344. *
  7345. * **RV64 Description**:\n
  7346. * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7347. * with the bottom 16-bit content of the 32-bit elements of Rs2.
  7348. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7349. * with the top 16-bit content of the 32-bit elements of Rs2.
  7350. * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  7351. * the top 16-bit content of the 32-bit elements of Rs2.
  7352. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
  7353. * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
  7354. * integers.
  7355. *
  7356. * **Operations**:\n
  7357. * ~~~
  7358. * RV32:
  7359. * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
  7360. * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
  7361. * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
  7362. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7363. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
  7364. * RV64:
  7365. * // SMALBB
  7366. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
  7367. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
  7368. * // SMALBT
  7369. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
  7370. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
  7371. * // SMALTT
  7372. * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
  7373. * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
  7374. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  7375. * ~~~
  7376. *
  7377. * \param [in] t long long type of value stored in t
  7378. * \param [in] a unsigned long type of value stored in a
  7379. * \param [in] b unsigned long type of value stored in b
  7380. * \return value stored in long long type
  7381. */
  7382. __STATIC_FORCEINLINE long long __RV_SMALBT(long long t, unsigned long a, unsigned long b)
  7383. {
  7384. __ASM volatile("smalbt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7385. return t;
  7386. }
  7387. /* ===== Inline Function End for 3.105.2. SMALBT ===== */
  7388. /* ===== Inline Function Start for 3.105.3. SMALTT ===== */
  7389. /**
  7390. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7391. * \brief SMALTT (Signed Multiply Top Halfs & Add 64-bit)
  7392. * \details
  7393. * **Type**: DSP (64-bit Profile)
  7394. *
  7395. * **Syntax**:\n
  7396. * ~~~
  7397. * SMALBB Rd, Rs1, Rs2
  7398. * SMALBT Rd, Rs1, Rs2
  7399. * SMALTT Rd, Rs1, Rs2
  7400. * ~~~
  7401. *
  7402. * **Purpose**:\n
  7403. * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
  7404. * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
  7405. * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
  7406. * to the register-pair (RV32) or the register (RV64).
  7407. * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
  7408. * * SMALBT rt pair + bottom*top (all 32-bit elements)
  7409. * * SMALTT rt pair + top*top (all 32-bit elements)
  7410. *
  7411. * **RV32 Description**:\n
  7412. * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7413. * content of Rs2.
  7414. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
  7415. * content of Rs2.
  7416. * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
  7417. * of Rs2.
  7418. * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
  7419. * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
  7420. * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  7421. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7422. * includes register 2d and 2d+1.
  7423. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7424. * register of the pair contains the low 32-bit of the operand.
  7425. *
  7426. * **RV64 Description**:\n
  7427. * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7428. * with the bottom 16-bit content of the 32-bit elements of Rs2.
  7429. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7430. * with the top 16-bit content of the 32-bit elements of Rs2.
  7431. * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  7432. * the top 16-bit content of the 32-bit elements of Rs2.
  7433. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
  7434. * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
  7435. * integers.
  7436. *
  7437. * **Operations**:\n
  7438. * ~~~
  7439. * RV32:
  7440. * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
  7441. * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
  7442. * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
  7443. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7444. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
  7445. * RV64:
  7446. * // SMALBB
  7447. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
  7448. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
  7449. * // SMALBT
  7450. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
  7451. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
  7452. * // SMALTT
  7453. * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
  7454. * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
  7455. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  7456. * ~~~
  7457. *
  7458. * \param [in] t long long type of value stored in t
  7459. * \param [in] a unsigned long type of value stored in a
  7460. * \param [in] b unsigned long type of value stored in b
  7461. * \return value stored in long long type
  7462. */
  7463. __STATIC_FORCEINLINE long long __RV_SMALTT(long long t, unsigned long a, unsigned long b)
  7464. {
  7465. __ASM volatile("smaltt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7466. return t;
  7467. }
  7468. /* ===== Inline Function End for 3.105.3. SMALTT ===== */
  7469. /* ===== Inline Function Start for 3.106.1. SMALDA ===== */
  7470. /**
  7471. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7472. * \brief SMALDA (Signed Multiply Two Halfs and Two Adds 64-bit)
  7473. * \details
  7474. * **Type**: DSP (64-bit Profile)
  7475. *
  7476. * **Syntax**:\n
  7477. * ~~~
  7478. * SMALDA Rd, Rs1, Rs2
  7479. * SMALXDA Rd, Rs1, Rs2
  7480. * ~~~
  7481. *
  7482. * **Purpose**:\n
  7483. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  7484. * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together.
  7485. * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements)
  7486. * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
  7487. *
  7488. * **RV32 Description**:\n
  7489. * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7490. * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
  7491. * the top 16-bit content of Rs2 with unlimited precision.
  7492. * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
  7493. * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
  7494. * with the top 16-bit content of Rs2 with unlimited precision.
  7495. * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64-
  7496. * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64-
  7497. * bit value of the register-pair are treated as signed integers.
  7498. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7499. * includes register 2d and 2d+1.
  7500. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7501. * register of the pair contains the low 32-bit of the operand.
  7502. *
  7503. * **RV64 Description**:\n
  7504. * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7505. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  7506. * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
  7507. * bit elements of Rs2 with unlimited precision.
  7508. * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1
  7509. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  7510. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
  7511. * 32-bit elements of Rs2 with unlimited precision.
  7512. * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
  7513. * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
  7514. *
  7515. * **Operations**:\n
  7516. * ~~~
  7517. * RV32:
  7518. * // SMALDA
  7519. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
  7520. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
  7521. * // SMALXDA
  7522. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
  7523. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
  7524. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7525. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]);
  7526. * RV64:
  7527. * // SMALDA
  7528. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  7529. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  7530. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  7531. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  7532. * // SMALXDA
  7533. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  7534. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
  7535. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  7536. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
  7537. * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) +
  7538. * SE64(Mres1[1][31:0]);
  7539. * ~~~
  7540. *
  7541. * \param [in] t long long type of value stored in t
  7542. * \param [in] a unsigned long type of value stored in a
  7543. * \param [in] b unsigned long type of value stored in b
  7544. * \return value stored in long long type
  7545. */
  7546. __STATIC_FORCEINLINE long long __RV_SMALDA(long long t, unsigned long a, unsigned long b)
  7547. {
  7548. __ASM volatile("smalda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7549. return t;
  7550. }
  7551. /* ===== Inline Function End for 3.106.1. SMALDA ===== */
  7552. /* ===== Inline Function Start for 3.106.2. SMALXDA ===== */
  7553. /**
  7554. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7555. * \brief SMALXDA (Signed Crossed Multiply Two Halfs and Two Adds 64-bit)
  7556. * \details
  7557. * **Type**: DSP (64-bit Profile)
  7558. *
  7559. * **Syntax**:\n
  7560. * ~~~
  7561. * SMALDA Rd, Rs1, Rs2
  7562. * SMALXDA Rd, Rs1, Rs2
  7563. * ~~~
  7564. *
  7565. * **Purpose**:\n
  7566. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  7567. * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together.
  7568. * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements)
  7569. * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
  7570. *
  7571. * **RV32 Description**:\n
  7572. * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7573. * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
  7574. * the top 16-bit content of Rs2 with unlimited precision.
  7575. * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
  7576. * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
  7577. * with the top 16-bit content of Rs2 with unlimited precision.
  7578. * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64-
  7579. * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64-
  7580. * bit value of the register-pair are treated as signed integers.
  7581. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7582. * includes register 2d and 2d+1.
  7583. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7584. * register of the pair contains the low 32-bit of the operand.
  7585. *
  7586. * **RV64 Description**:\n
  7587. * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7588. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  7589. * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
  7590. * bit elements of Rs2 with unlimited precision.
  7591. * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1
  7592. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  7593. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
  7594. * 32-bit elements of Rs2 with unlimited precision.
  7595. * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
  7596. * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
  7597. *
  7598. * **Operations**:\n
  7599. * ~~~
  7600. * RV32:
  7601. * // SMALDA
  7602. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
  7603. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
  7604. * // SMALXDA
  7605. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
  7606. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
  7607. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7608. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]);
  7609. * RV64:
  7610. * // SMALDA
  7611. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  7612. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  7613. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  7614. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  7615. * // SMALXDA
  7616. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  7617. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
  7618. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  7619. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
  7620. * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) +
  7621. * SE64(Mres1[1][31:0]);
  7622. * ~~~
  7623. *
  7624. * \param [in] t long long type of value stored in t
  7625. * \param [in] a unsigned long type of value stored in a
  7626. * \param [in] b unsigned long type of value stored in b
  7627. * \return value stored in long long type
  7628. */
  7629. __STATIC_FORCEINLINE long long __RV_SMALXDA(long long t, unsigned long a, unsigned long b)
  7630. {
  7631. __ASM volatile("smalxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7632. return t;
  7633. }
  7634. /* ===== Inline Function End for 3.106.2. SMALXDA ===== */
  7635. /* ===== Inline Function Start for 3.107.1. SMALDS ===== */
  7636. /**
  7637. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7638. * \brief SMALDS (Signed Multiply Two Halfs & Subtract & Add 64-bit)
  7639. * \details
  7640. * **Type**: DSP (64-bit Profile)
  7641. *
  7642. * **Syntax**:\n
  7643. * ~~~
  7644. * SMALDS Rd, Rs1, Rs2
  7645. * SMALDRS Rd, Rs1, Rs2
  7646. * SMALXDS Rd, Rs1, Rs2
  7647. * ~~~
  7648. *
  7649. * **Purpose**:\n
  7650. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  7651. * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
  7652. * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
  7653. * written back to the register-pair.
  7654. * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
  7655. * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
  7656. * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
  7657. *
  7658. * **RV32 Description**:\n
  7659. * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7660. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  7661. * Rs1 with the top 16-bit content of Rs2.
  7662. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
  7663. * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
  7664. * with the bottom 16-bit content of Rs2.
  7665. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
  7666. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  7667. * Rs1 with the bottom 16-bit content of Rs2.
  7668. * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
  7669. * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
  7670. * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  7671. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7672. * includes register 2d and 2d+1.
  7673. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7674. * register of the pair contains the low 32-bit of the operand.
  7675. *
  7676. * **RV64 Description**:\n
  7677. * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7678. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  7679. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
  7680. * of the 32-bit elements of Rs2.
  7681. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  7682. * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
  7683. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
  7684. * the 32-bit elements of Rs2.
  7685. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7686. * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  7687. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
  7688. * content of the 32-bit elements of Rs2.
  7689. * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
  7690. * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
  7691. * integers.
  7692. *
  7693. * **Operations**:\n
  7694. * ~~~
  7695. * * RV32:
  7696. * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
  7697. * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
  7698. * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
  7699. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7700. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
  7701. * * RV64:
  7702. * // SMALDS
  7703. * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  7704. * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  7705. * // SMALDRS
  7706. * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  7707. * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  7708. * // SMALXDS
  7709. * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  7710. * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  7711. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  7712. * ~~~
  7713. *
  7714. * \param [in] t long long type of value stored in t
  7715. * \param [in] a unsigned long type of value stored in a
  7716. * \param [in] b unsigned long type of value stored in b
  7717. * \return value stored in long long type
  7718. */
  7719. __STATIC_FORCEINLINE long long __RV_SMALDS(long long t, unsigned long a, unsigned long b)
  7720. {
  7721. __ASM volatile("smalds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7722. return t;
  7723. }
  7724. /* ===== Inline Function End for 3.107.1. SMALDS ===== */
  7725. /* ===== Inline Function Start for 3.107.2. SMALDRS ===== */
  7726. /**
  7727. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7728. * \brief SMALDRS (Signed Multiply Two Halfs & Reverse Subtract & Add 64- bit)
  7729. * \details
  7730. * **Type**: DSP (64-bit Profile)
  7731. *
  7732. * **Syntax**:\n
  7733. * ~~~
  7734. * SMALDS Rd, Rs1, Rs2
  7735. * SMALDRS Rd, Rs1, Rs2
  7736. * SMALXDS Rd, Rs1, Rs2
  7737. * ~~~
  7738. *
  7739. * **Purpose**:\n
  7740. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  7741. * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
  7742. * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
  7743. * written back to the register-pair.
  7744. * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
  7745. * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
  7746. * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
  7747. *
  7748. * **RV32 Description**:\n
  7749. * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7750. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  7751. * Rs1 with the top 16-bit content of Rs2.
  7752. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
  7753. * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
  7754. * with the bottom 16-bit content of Rs2.
  7755. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
  7756. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  7757. * Rs1 with the bottom 16-bit content of Rs2.
  7758. * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
  7759. * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
  7760. * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  7761. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7762. * includes register 2d and 2d+1.
  7763. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7764. * register of the pair contains the low 32-bit of the operand.
  7765. *
  7766. * **RV64 Description**:\n
  7767. * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7768. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  7769. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
  7770. * of the 32-bit elements of Rs2.
  7771. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  7772. * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
  7773. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
  7774. * the 32-bit elements of Rs2.
  7775. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7776. * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  7777. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
  7778. * content of the 32-bit elements of Rs2.
  7779. * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
  7780. * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
  7781. * integers.
  7782. *
  7783. * **Operations**:\n
  7784. * ~~~
  7785. * * RV32:
  7786. * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
  7787. * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
  7788. * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
  7789. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7790. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
  7791. * * RV64:
  7792. * // SMALDS
  7793. * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  7794. * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  7795. * // SMALDRS
  7796. * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  7797. * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  7798. * // SMALXDS
  7799. * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  7800. * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  7801. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  7802. * ~~~
  7803. *
  7804. * \param [in] t long long type of value stored in t
  7805. * \param [in] a unsigned long type of value stored in a
  7806. * \param [in] b unsigned long type of value stored in b
  7807. * \return value stored in long long type
  7808. */
  7809. __STATIC_FORCEINLINE long long __RV_SMALDRS(long long t, unsigned long a, unsigned long b)
  7810. {
  7811. __ASM volatile("smaldrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7812. return t;
  7813. }
  7814. /* ===== Inline Function End for 3.107.2. SMALDRS ===== */
  7815. /* ===== Inline Function Start for 3.107.3. SMALXDS ===== */
  7816. /**
  7817. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7818. * \brief SMALXDS (Signed Crossed Multiply Two Halfs & Subtract & Add 64- bit)
  7819. * \details
  7820. * **Type**: DSP (64-bit Profile)
  7821. *
  7822. * **Syntax**:\n
  7823. * ~~~
  7824. * SMALDS Rd, Rs1, Rs2
  7825. * SMALDRS Rd, Rs1, Rs2
  7826. * SMALXDS Rd, Rs1, Rs2
  7827. * ~~~
  7828. *
  7829. * **Purpose**:\n
  7830. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  7831. * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
  7832. * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
  7833. * written back to the register-pair.
  7834. * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
  7835. * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
  7836. * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
  7837. *
  7838. * **RV32 Description**:\n
  7839. * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7840. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  7841. * Rs1 with the top 16-bit content of Rs2.
  7842. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
  7843. * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
  7844. * with the bottom 16-bit content of Rs2.
  7845. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
  7846. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  7847. * Rs1 with the bottom 16-bit content of Rs2.
  7848. * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
  7849. * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
  7850. * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  7851. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7852. * includes register 2d and 2d+1.
  7853. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7854. * register of the pair contains the low 32-bit of the operand.
  7855. *
  7856. * **RV64 Description**:\n
  7857. * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7858. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  7859. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
  7860. * of the 32-bit elements of Rs2.
  7861. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  7862. * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
  7863. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
  7864. * the 32-bit elements of Rs2.
  7865. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7866. * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  7867. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
  7868. * content of the 32-bit elements of Rs2.
  7869. * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
  7870. * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
  7871. * integers.
  7872. *
  7873. * **Operations**:\n
  7874. * ~~~
  7875. * * RV32:
  7876. * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
  7877. * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
  7878. * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
  7879. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7880. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
  7881. * * RV64:
  7882. * // SMALDS
  7883. * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  7884. * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  7885. * // SMALDRS
  7886. * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  7887. * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  7888. * // SMALXDS
  7889. * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  7890. * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  7891. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  7892. * ~~~
  7893. *
  7894. * \param [in] t long long type of value stored in t
  7895. * \param [in] a unsigned long type of value stored in a
  7896. * \param [in] b unsigned long type of value stored in b
  7897. * \return value stored in long long type
  7898. */
  7899. __STATIC_FORCEINLINE long long __RV_SMALXDS(long long t, unsigned long a, unsigned long b)
  7900. {
  7901. __ASM volatile("smalxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7902. return t;
  7903. }
  7904. /* ===== Inline Function End for 3.107.3. SMALXDS ===== */
  7905. /* ===== Inline Function Start for 3.108. SMAR64 ===== */
  7906. /**
  7907. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  7908. * \brief SMAR64 (Signed Multiply and Add to 64-Bit Data)
  7909. * \details
  7910. * **Type**: DSP (64-bit Profile)
  7911. *
  7912. * **Syntax**:\n
  7913. * ~~~
  7914. * SMAR64 Rd, Rs1, Rs2
  7915. * ~~~
  7916. *
  7917. * **Purpose**:\n
  7918. * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication
  7919. * result to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is written
  7920. * back to the pair of registers (RV32) or a register (RV64).
  7921. *
  7922. * **RV32 Description**:\n
  7923. * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds
  7924. * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by
  7925. * Rd(4,1). The addition result is written back to the even/odd pair of registers specified by Rd(4,1).
  7926. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  7927. * includes register 2d and 2d+1.
  7928. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  7929. * of the pair contains the low 32-bit of the result.
  7930. *
  7931. * **RV64 Description**:\n
  7932. * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
  7933. * adds the 64-bit multiplication results to the 64-bit signed data of Rd. The addition result is written
  7934. * back to Rd.
  7935. *
  7936. * **Operations**:\n
  7937. * ~~~
  7938. * * RV32:
  7939. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  7940. * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2);
  7941. * * RV64:
  7942. * Rd = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]);
  7943. * ~~~
  7944. *
  7945. * \param [in] t long long type of value stored in t
  7946. * \param [in] a long type of value stored in a
  7947. * \param [in] b long type of value stored in b
  7948. * \return value stored in long long type
  7949. */
  7950. __STATIC_FORCEINLINE long long __RV_SMAR64(long long t, long a, long b)
  7951. {
  7952. __ASM volatile("smar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7953. return t;
  7954. }
  7955. /* ===== Inline Function End for 3.108. SMAR64 ===== */
  7956. /* ===== Inline Function Start for 3.109. SMAQA ===== */
  7957. /**
  7958. * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
  7959. * \brief SMAQA (Signed Multiply Four Bytes with 32-bit Adds)
  7960. * \details
  7961. * **Type**: Partial-SIMD (Reduction)
  7962. *
  7963. * **Syntax**:\n
  7964. * ~~~
  7965. * SMAQA Rd, Rs1, Rs2
  7966. * ~~~
  7967. *
  7968. * **Purpose**:\n
  7969. * Do four signed 8-bit multiplications from 32-bit chunks of two registers; and then adds
  7970. * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together.
  7971. *
  7972. * **Description**:\n
  7973. * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
  7974. * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the signed
  7975. * content of the corresponding 32-bit chunks of Rd. The final results are written back to the
  7976. * corresponding 32-bit chunks in Rd.
  7977. *
  7978. * **Operations**:\n
  7979. * ~~~
  7980. * res[x] = Rd.W[x] +
  7981. * (Rs1.W[x].B[3] s* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] s* Rs2.W[x].B[2]) +
  7982. * (Rs1.W[x].B[1] s* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] s* Rs2.W[x].B[0]);
  7983. * Rd.W[x] = res[x];
  7984. * for RV32: x=0,
  7985. * for RV64: x=1,0
  7986. * ~~~
  7987. *
  7988. * \param [in] t long type of value stored in t
  7989. * \param [in] a unsigned long type of value stored in a
  7990. * \param [in] b unsigned long type of value stored in b
  7991. * \return value stored in long type
  7992. */
  7993. __STATIC_FORCEINLINE long __RV_SMAQA(long t, unsigned long a, unsigned long b)
  7994. {
  7995. __ASM volatile("smaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7996. return t;
  7997. }
  7998. /* ===== Inline Function End for 3.109. SMAQA ===== */
  7999. /* ===== Inline Function Start for 3.110. SMAQA.SU ===== */
  8000. /**
  8001. * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
  8002. * \brief SMAQA.SU (Signed and Unsigned Multiply Four Bytes with 32-bit Adds)
  8003. * \details
  8004. * **Type**: Partial-SIMD (Reduction)
  8005. *
  8006. * **Syntax**:\n
  8007. * ~~~
  8008. * SMAQA.SU Rd, Rs1, Rs2
  8009. * ~~~
  8010. *
  8011. * **Purpose**:\n
  8012. * Do four `signed x unsigned` 8-bit multiplications from 32-bit chunks of two registers; and
  8013. * then adds the four 16-bit results and the content of corresponding 32-bit chunks of a third register
  8014. * together.
  8015. *
  8016. * **Description**:\n
  8017. * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
  8018. * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
  8019. * signed content of the corresponding 32-bit chunks of Rd. The final results are written back to the
  8020. * corresponding 32-bit chunks in Rd.
  8021. *
  8022. * **Operations**:\n
  8023. * ~~~
  8024. * res[x] = Rd.W[x] +
  8025. * (Rs1.W[x].B[3] su* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] su* Rs2.W[x].B[2]) +
  8026. * (Rs1.W[x].B[1] su* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] su* Rs2.W[x].B[0]);
  8027. * Rd.W[x] = res[x];
  8028. * for RV32: x=0,
  8029. * for RV64: x=1...0
  8030. * ~~~
  8031. *
  8032. * \param [in] t long type of value stored in t
  8033. * \param [in] a unsigned long type of value stored in a
  8034. * \param [in] b unsigned long type of value stored in b
  8035. * \return value stored in long type
  8036. */
  8037. __STATIC_FORCEINLINE long __RV_SMAQA_SU(long t, unsigned long a, unsigned long b)
  8038. {
  8039. __ASM volatile("smaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  8040. return t;
  8041. }
  8042. /* ===== Inline Function End for 3.110. SMAQA.SU ===== */
  8043. /* ===== Inline Function Start for 3.111. SMAX8 ===== */
  8044. /**
  8045. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  8046. * \brief SMAX8 (SIMD 8-bit Signed Maximum)
  8047. * \details
  8048. * **Type**: SIMD
  8049. *
  8050. * **Syntax**:\n
  8051. * ~~~
  8052. * SMAX8 Rd, Rs1, Rs2
  8053. * ~~~
  8054. *
  8055. * **Purpose**:\n
  8056. * Do 8-bit signed integer elements finding maximum operations simultaneously.
  8057. *
  8058. * **Description**:\n
  8059. * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
  8060. * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
  8061. * selected results are written to Rd.
  8062. *
  8063. * **Operations**:\n
  8064. * ~~~
  8065. * Rd.B[x] = (Rs1.B[x] > Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
  8066. * for RV32: x=3...0,
  8067. * for RV64: x=7...0
  8068. * ~~~
  8069. *
  8070. * \param [in] a unsigned long type of value stored in a
  8071. * \param [in] b unsigned long type of value stored in b
  8072. * \return value stored in unsigned long type
  8073. */
  8074. __STATIC_FORCEINLINE unsigned long __RV_SMAX8(unsigned long a, unsigned long b)
  8075. {
  8076. unsigned long result;
  8077. __ASM volatile("smax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8078. return result;
  8079. }
  8080. /* ===== Inline Function End for 3.111. SMAX8 ===== */
  8081. /* ===== Inline Function Start for 3.112. SMAX16 ===== */
  8082. /**
  8083. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  8084. * \brief SMAX16 (SIMD 16-bit Signed Maximum)
  8085. * \details
  8086. * **Type**: SIMD
  8087. *
  8088. * **Syntax**:\n
  8089. * ~~~
  8090. * SMAX16 Rd, Rs1, Rs2
  8091. * ~~~
  8092. *
  8093. * **Purpose**:\n
  8094. * Do 16-bit signed integer elements finding maximum operations simultaneously.
  8095. *
  8096. * **Description**:\n
  8097. * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
  8098. * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
  8099. * selected results are written to Rd.
  8100. *
  8101. * **Operations**:\n
  8102. * ~~~
  8103. * Rd.H[x] = (Rs1.H[x] > Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
  8104. * for RV32: x=1...0,
  8105. * for RV64: x=3...0
  8106. * ~~~
  8107. *
  8108. * \param [in] a unsigned long type of value stored in a
  8109. * \param [in] b unsigned long type of value stored in b
  8110. * \return value stored in unsigned long type
  8111. */
  8112. __STATIC_FORCEINLINE unsigned long __RV_SMAX16(unsigned long a, unsigned long b)
  8113. {
  8114. unsigned long result;
  8115. __ASM volatile("smax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8116. return result;
  8117. }
  8118. /* ===== Inline Function End for 3.112. SMAX16 ===== */
  8119. /* ===== Inline Function Start for 3.113.1. SMBB16 ===== */
  8120. /**
  8121. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  8122. * \brief SMBB16 (SIMD Signed Multiply Bottom Half & Bottom Half)
  8123. * \details
  8124. * **Type**: SIMD
  8125. *
  8126. * **Syntax**:\n
  8127. * ~~~
  8128. * SMBB16 Rd, Rs1, Rs2
  8129. * SMBT16 Rd, Rs1, Rs2
  8130. * SMTT16 Rd, Rs1, Rs2
  8131. * ~~~
  8132. *
  8133. * **Purpose**:\n
  8134. * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
  8135. * bit content of the 32-bit elements of another register and write the result to a third register.
  8136. * * SMBB16: W[x].bottom*W[x].bottom
  8137. * * SMBT16: W[x].bottom *W[x].top
  8138. * * SMTT16: W[x].top * W[x].top
  8139. *
  8140. * **Description**:\n
  8141. * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8142. * with the bottom 16-bit content of the 32-bit elements of Rs2.
  8143. * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8144. * with the top 16-bit content of the 32-bit elements of Rs2.
  8145. * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8146. * the top 16-bit content of the 32-bit elements of Rs2.
  8147. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
  8148. * integers.
  8149. *
  8150. * **Operations**:\n
  8151. * ~~~
  8152. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
  8153. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
  8154. * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
  8155. * for RV32: x=0,
  8156. * for RV64: x=1...0
  8157. * ~~~
  8158. *
  8159. * \param [in] a unsigned long type of value stored in a
  8160. * \param [in] b unsigned long type of value stored in b
  8161. * \return value stored in long type
  8162. */
  8163. __STATIC_FORCEINLINE long __RV_SMBB16(unsigned long a, unsigned long b)
  8164. {
  8165. long result;
  8166. __ASM volatile("smbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8167. return result;
  8168. }
  8169. /* ===== Inline Function End for 3.113.1. SMBB16 ===== */
  8170. /* ===== Inline Function Start for 3.113.2. SMBT16 ===== */
  8171. /**
  8172. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  8173. * \brief SMBT16 (SIMD Signed Multiply Bottom Half & Top Half)
  8174. * \details
  8175. * **Type**: SIMD
  8176. *
  8177. * **Syntax**:\n
  8178. * ~~~
  8179. * SMBB16 Rd, Rs1, Rs2
  8180. * SMBT16 Rd, Rs1, Rs2
  8181. * SMTT16 Rd, Rs1, Rs2
  8182. * ~~~
  8183. *
  8184. * **Purpose**:\n
  8185. * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
  8186. * bit content of the 32-bit elements of another register and write the result to a third register.
  8187. * * SMBB16: W[x].bottom*W[x].bottom
  8188. * * SMBT16: W[x].bottom *W[x].top
  8189. * * SMTT16: W[x].top * W[x].top
  8190. *
  8191. * **Description**:\n
  8192. * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8193. * with the bottom 16-bit content of the 32-bit elements of Rs2.
  8194. * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8195. * with the top 16-bit content of the 32-bit elements of Rs2.
  8196. * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8197. * the top 16-bit content of the 32-bit elements of Rs2.
  8198. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
  8199. * integers.
  8200. *
  8201. * **Operations**:\n
  8202. * ~~~
  8203. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
  8204. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
  8205. * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
  8206. * for RV32: x=0,
  8207. * for RV64: x=1...0
  8208. * ~~~
  8209. *
  8210. * \param [in] a unsigned long type of value stored in a
  8211. * \param [in] b unsigned long type of value stored in b
  8212. * \return value stored in long type
  8213. */
  8214. __STATIC_FORCEINLINE long __RV_SMBT16(unsigned long a, unsigned long b)
  8215. {
  8216. long result;
  8217. __ASM volatile("smbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8218. return result;
  8219. }
  8220. /* ===== Inline Function End for 3.113.2. SMBT16 ===== */
  8221. /* ===== Inline Function Start for 3.113.3. SMTT16 ===== */
  8222. /**
  8223. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  8224. * \brief SMTT16 (SIMD Signed Multiply Top Half & Top Half)
  8225. * \details
  8226. * **Type**: SIMD
  8227. *
  8228. * **Syntax**:\n
  8229. * ~~~
  8230. * SMBB16 Rd, Rs1, Rs2
  8231. * SMBT16 Rd, Rs1, Rs2
  8232. * SMTT16 Rd, Rs1, Rs2
  8233. * ~~~
  8234. *
  8235. * **Purpose**:\n
  8236. * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
  8237. * bit content of the 32-bit elements of another register and write the result to a third register.
  8238. * * SMBB16: W[x].bottom*W[x].bottom
  8239. * * SMBT16: W[x].bottom *W[x].top
  8240. * * SMTT16: W[x].top * W[x].top
  8241. *
  8242. * **Description**:\n
  8243. * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8244. * with the bottom 16-bit content of the 32-bit elements of Rs2.
  8245. * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8246. * with the top 16-bit content of the 32-bit elements of Rs2.
  8247. * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8248. * the top 16-bit content of the 32-bit elements of Rs2.
  8249. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
  8250. * integers.
  8251. *
  8252. * **Operations**:\n
  8253. * ~~~
  8254. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
  8255. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
  8256. * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
  8257. * for RV32: x=0,
  8258. * for RV64: x=1...0
  8259. * ~~~
  8260. *
  8261. * \param [in] a unsigned long type of value stored in a
  8262. * \param [in] b unsigned long type of value stored in b
  8263. * \return value stored in long type
  8264. */
  8265. __STATIC_FORCEINLINE long __RV_SMTT16(unsigned long a, unsigned long b)
  8266. {
  8267. long result;
  8268. __ASM volatile("smtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8269. return result;
  8270. }
  8271. /* ===== Inline Function End for 3.113.3. SMTT16 ===== */
  8272. /* ===== Inline Function Start for 3.114.1. SMDS ===== */
  8273. /**
  8274. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  8275. * \brief SMDS (SIMD Signed Multiply Two Halfs and Subtract)
  8276. * \details
  8277. * **Type**: SIMD
  8278. *
  8279. * **Syntax**:\n
  8280. * ~~~
  8281. * SMDS Rd, Rs1, Rs2
  8282. * SMDRS Rd, Rs1, Rs2
  8283. * SMXDS Rd, Rs1, Rs2
  8284. * ~~~
  8285. *
  8286. * **Purpose**:\n
  8287. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  8288. * perform a subtraction operation between the two 32-bit results.
  8289. * * SMDS: top*top - bottom*bottom (per 32-bit element)
  8290. * * SMDRS: bottom*bottom - top*top (per 32-bit element)
  8291. * * SMXDS: top*bottom - bottom*top (per 32-bit element)
  8292. *
  8293. * **Description**:\n
  8294. * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
  8295. * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
  8296. * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
  8297. * 32-bit elements of Rs2.
  8298. * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8299. * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
  8300. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
  8301. * the 32-bit elements of Rs2.
  8302. * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8303. * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  8304. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
  8305. * content of the 32-bit elements of Rs2.
  8306. * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
  8307. * multiplication are treated as signed integers.
  8308. *
  8309. * **Operations**:\n
  8310. * ~~~
  8311. * * SMDS:
  8312. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  8313. * * SMDRS:
  8314. * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
  8315. * * SMXDS:
  8316. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  8317. * ~~~
  8318. *
  8319. * \param [in] a unsigned long type of value stored in a
  8320. * \param [in] b unsigned long type of value stored in b
  8321. * \return value stored in long type
  8322. */
  8323. __STATIC_FORCEINLINE long __RV_SMDS(unsigned long a, unsigned long b)
  8324. {
  8325. long result;
  8326. __ASM volatile("smds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8327. return result;
  8328. }
  8329. /* ===== Inline Function End for 3.114.1. SMDS ===== */
  8330. /* ===== Inline Function Start for 3.114.2. SMDRS ===== */
  8331. /**
  8332. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  8333. * \brief SMDRS (SIMD Signed Multiply Two Halfs and Reverse Subtract)
  8334. * \details
  8335. * **Type**: SIMD
  8336. *
  8337. * **Syntax**:\n
  8338. * ~~~
  8339. * SMDS Rd, Rs1, Rs2
  8340. * SMDRS Rd, Rs1, Rs2
  8341. * SMXDS Rd, Rs1, Rs2
  8342. * ~~~
  8343. *
  8344. * **Purpose**:\n
  8345. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  8346. * perform a subtraction operation between the two 32-bit results.
  8347. * * SMDS: top*top - bottom*bottom (per 32-bit element)
  8348. * * SMDRS: bottom*bottom - top*top (per 32-bit element)
  8349. * * SMXDS: top*bottom - bottom*top (per 32-bit element)
  8350. *
  8351. * **Description**:\n
  8352. * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
  8353. * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
  8354. * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
  8355. * 32-bit elements of Rs2.
  8356. * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8357. * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
  8358. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
  8359. * the 32-bit elements of Rs2.
  8360. * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8361. * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  8362. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
  8363. * content of the 32-bit elements of Rs2.
  8364. * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
  8365. * multiplication are treated as signed integers.
  8366. *
  8367. * **Operations**:\n
  8368. * ~~~
  8369. * * SMDS:
  8370. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  8371. * * SMDRS:
  8372. * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
  8373. * * SMXDS:
  8374. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  8375. * ~~~
  8376. *
  8377. * \param [in] a unsigned long type of value stored in a
  8378. * \param [in] b unsigned long type of value stored in b
  8379. * \return value stored in long type
  8380. */
  8381. __STATIC_FORCEINLINE long __RV_SMDRS(unsigned long a, unsigned long b)
  8382. {
  8383. long result;
  8384. __ASM volatile("smdrs %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8385. return result;
  8386. }
  8387. /* ===== Inline Function End for 3.114.2. SMDRS ===== */
  8388. /* ===== Inline Function Start for 3.114.3. SMXDS ===== */
  8389. /**
  8390. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  8391. * \brief SMXDS (SIMD Signed Crossed Multiply Two Halfs and Subtract)
  8392. * \details
  8393. * **Type**: SIMD
  8394. *
  8395. * **Syntax**:\n
  8396. * ~~~
  8397. * SMDS Rd, Rs1, Rs2
  8398. * SMDRS Rd, Rs1, Rs2
  8399. * SMXDS Rd, Rs1, Rs2
  8400. * ~~~
  8401. *
  8402. * **Purpose**:\n
  8403. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  8404. * perform a subtraction operation between the two 32-bit results.
  8405. * * SMDS: top*top - bottom*bottom (per 32-bit element)
  8406. * * SMDRS: bottom*bottom - top*top (per 32-bit element)
  8407. * * SMXDS: top*bottom - bottom*top (per 32-bit element)
  8408. *
  8409. * **Description**:\n
  8410. * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
  8411. * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
  8412. * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
  8413. * 32-bit elements of Rs2.
  8414. * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8415. * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
  8416. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
  8417. * the 32-bit elements of Rs2.
  8418. * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8419. * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  8420. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
  8421. * content of the 32-bit elements of Rs2.
  8422. * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
  8423. * multiplication are treated as signed integers.
  8424. *
  8425. * **Operations**:\n
  8426. * ~~~
  8427. * * SMDS:
  8428. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  8429. * * SMDRS:
  8430. * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
  8431. * * SMXDS:
  8432. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  8433. * ~~~
  8434. *
  8435. * \param [in] a unsigned long type of value stored in a
  8436. * \param [in] b unsigned long type of value stored in b
  8437. * \return value stored in long type
  8438. */
  8439. __STATIC_FORCEINLINE long __RV_SMXDS(unsigned long a, unsigned long b)
  8440. {
  8441. long result;
  8442. __ASM volatile("smxds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8443. return result;
  8444. }
  8445. /* ===== Inline Function End for 3.114.3. SMXDS ===== */
  8446. /* ===== Inline Function Start for 3.115. SMIN8 ===== */
  8447. /**
  8448. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  8449. * \brief SMIN8 (SIMD 8-bit Signed Minimum)
  8450. * \details
  8451. * **Type**: SIMD
  8452. *
  8453. * **Syntax**:\n
  8454. * ~~~
  8455. * SMIN8 Rd, Rs1, Rs2
  8456. * ~~~
  8457. *
  8458. * **Purpose**:\n
  8459. * Do 8-bit signed integer elements finding minimum operations simultaneously.
  8460. *
  8461. * **Description**:\n
  8462. * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
  8463. * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
  8464. * results are written to Rd.
  8465. *
  8466. * **Operations**:\n
  8467. * ~~~
  8468. * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
  8469. * for RV32: x=3...0,
  8470. * for RV64: x=7...0
  8471. * ~~~
  8472. *
  8473. * \param [in] a unsigned long type of value stored in a
  8474. * \param [in] b unsigned long type of value stored in b
  8475. * \return value stored in unsigned long type
  8476. */
  8477. __STATIC_FORCEINLINE unsigned long __RV_SMIN8(unsigned long a, unsigned long b)
  8478. {
  8479. unsigned long result;
  8480. __ASM volatile("smin8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8481. return result;
  8482. }
  8483. /* ===== Inline Function End for 3.115. SMIN8 ===== */
  8484. /* ===== Inline Function Start for 3.116. SMIN16 ===== */
  8485. /**
  8486. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  8487. * \brief SMIN16 (SIMD 16-bit Signed Minimum)
  8488. * \details
  8489. * **Type**: SIMD
  8490. *
  8491. * **Syntax**:\n
  8492. * ~~~
  8493. * SMIN16 Rd, Rs1, Rs2
  8494. * ~~~
  8495. *
  8496. * **Purpose**:\n
  8497. * Do 16-bit signed integer elements finding minimum operations simultaneously.
  8498. *
  8499. * **Description**:\n
  8500. * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
  8501. * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
  8502. * results are written to Rd.
  8503. *
  8504. * **Operations**:\n
  8505. * ~~~
  8506. * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
  8507. * for RV32: x=1...0,
  8508. * for RV64: x=3...0
  8509. * ~~~
  8510. *
  8511. * \param [in] a unsigned long type of value stored in a
  8512. * \param [in] b unsigned long type of value stored in b
  8513. * \return value stored in unsigned long type
  8514. */
  8515. __STATIC_FORCEINLINE unsigned long __RV_SMIN16(unsigned long a, unsigned long b)
  8516. {
  8517. unsigned long result;
  8518. __ASM volatile("smin16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8519. return result;
  8520. }
  8521. /* ===== Inline Function End for 3.116. SMIN16 ===== */
  8522. /* ===== Inline Function Start for 3.117.1. SMMUL ===== */
  8523. /**
  8524. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  8525. * \brief SMMUL (SIMD MSW Signed Multiply Word)
  8526. * \details
  8527. * **Type**: SIMD
  8528. *
  8529. * **Syntax**:\n
  8530. * ~~~
  8531. * SMMUL Rd, Rs1, Rs2
  8532. * SMMUL.u Rd, Rs1, Rs2
  8533. * ~~~
  8534. *
  8535. * **Purpose**:\n
  8536. * Multiply the 32-bit signed integer elements of two registers and write the most significant
  8537. * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an
  8538. * additional rounding up operation on the multiplication results before taking the most significant
  8539. * 32-bit part of the results.
  8540. *
  8541. * **Description**:\n
  8542. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
  8543. * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
  8544. * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up
  8545. * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
  8546. * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction.
  8547. *
  8548. * **Operations**:\n
  8549. * ~~~
  8550. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  8551. * if (`.u` form) {
  8552. * Round[x][32:0] = Mres[x][63:31] + 1;
  8553. * Rd.W[x] = Round[x][32:1];
  8554. * } else {
  8555. * Rd.W[x] = Mres[x][63:32];
  8556. * }
  8557. * for RV32: x=0
  8558. * for RV64: x=1...0
  8559. * ~~~
  8560. *
  8561. * \param [in] a long type of value stored in a
  8562. * \param [in] b long type of value stored in b
  8563. * \return value stored in long type
  8564. */
  8565. __STATIC_FORCEINLINE long __RV_SMMUL(long a, long b)
  8566. {
  8567. long result;
  8568. __ASM volatile("smmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8569. return result;
  8570. }
  8571. /* ===== Inline Function End for 3.117.1. SMMUL ===== */
  8572. /* ===== Inline Function Start for 3.117.2. SMMUL.u ===== */
  8573. /**
  8574. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  8575. * \brief SMMUL.u (SIMD MSW Signed Multiply Word with Rounding)
  8576. * \details
  8577. * **Type**: SIMD
  8578. *
  8579. * **Syntax**:\n
  8580. * ~~~
  8581. * SMMUL Rd, Rs1, Rs2
  8582. * SMMUL.u Rd, Rs1, Rs2
  8583. * ~~~
  8584. *
  8585. * **Purpose**:\n
  8586. * Multiply the 32-bit signed integer elements of two registers and write the most significant
  8587. * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an
  8588. * additional rounding up operation on the multiplication results before taking the most significant
  8589. * 32-bit part of the results.
  8590. *
  8591. * **Description**:\n
  8592. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
  8593. * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
  8594. * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up
  8595. * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
  8596. * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction.
  8597. *
  8598. * **Operations**:\n
  8599. * ~~~
  8600. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  8601. * if (`.u` form) {
  8602. * Round[x][32:0] = Mres[x][63:31] + 1;
  8603. * Rd.W[x] = Round[x][32:1];
  8604. * } else {
  8605. * Rd.W[x] = Mres[x][63:32];
  8606. * }
  8607. * for RV32: x=0
  8608. * for RV64: x=1...0
  8609. * ~~~
  8610. *
  8611. * \param [in] a long type of value stored in a
  8612. * \param [in] b long type of value stored in b
  8613. * \return value stored in long type
  8614. */
  8615. __STATIC_FORCEINLINE long __RV_SMMUL_U(long a, long b)
  8616. {
  8617. long result;
  8618. __ASM volatile("smmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8619. return result;
  8620. }
  8621. /* ===== Inline Function End for 3.117.2. SMMUL.u ===== */
  8622. /* ===== Inline Function Start for 3.118.1. SMMWB ===== */
  8623. /**
  8624. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  8625. * \brief SMMWB (SIMD MSW Signed Multiply Word and Bottom Half)
  8626. * \details
  8627. * **Type**: SIMD
  8628. *
  8629. * **Syntax**:\n
  8630. * ~~~
  8631. * SMMWB Rd, Rs1, Rs2
  8632. * SMMWB.u Rd, Rs1, Rs2
  8633. * ~~~
  8634. *
  8635. * **Purpose**:\n
  8636. * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
  8637. * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
  8638. * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
  8639. * significant discarded bit.
  8640. *
  8641. * **Description**:\n
  8642. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
  8643. * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
  8644. * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
  8645. * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
  8646. *
  8647. * **Operations**:\n
  8648. * ~~~
  8649. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
  8650. * if (`.u` form) {
  8651. * Round[x][32:0] = Mres[x][47:15] + 1;
  8652. * Rd.W[x] = Round[x][32:1];
  8653. * } else {
  8654. * Rd.W[x] = Mres[x][47:16];
  8655. * }
  8656. * for RV32: x=0
  8657. * for RV64: x=1...0
  8658. * ~~~
  8659. *
  8660. * \param [in] a long type of value stored in a
  8661. * \param [in] b unsigned long type of value stored in b
  8662. * \return value stored in long type
  8663. */
  8664. __STATIC_FORCEINLINE long __RV_SMMWB(long a, unsigned long b)
  8665. {
  8666. long result;
  8667. __ASM volatile("smmwb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8668. return result;
  8669. }
  8670. /* ===== Inline Function End for 3.118.1. SMMWB ===== */
  8671. /* ===== Inline Function Start for 3.118.2. SMMWB.u ===== */
  8672. /**
  8673. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  8674. * \brief SMMWB.u (SIMD MSW Signed Multiply Word and Bottom Half with Rounding)
  8675. * \details
  8676. * **Type**: SIMD
  8677. *
  8678. * **Syntax**:\n
  8679. * ~~~
  8680. * SMMWB Rd, Rs1, Rs2
  8681. * SMMWB.u Rd, Rs1, Rs2
  8682. * ~~~
  8683. *
  8684. * **Purpose**:\n
  8685. * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
  8686. * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
  8687. * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
  8688. * significant discarded bit.
  8689. *
  8690. * **Description**:\n
  8691. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
  8692. * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
  8693. * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
  8694. * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
  8695. *
  8696. * **Operations**:\n
  8697. * ~~~
  8698. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
  8699. * if (`.u` form) {
  8700. * Round[x][32:0] = Mres[x][47:15] + 1;
  8701. * Rd.W[x] = Round[x][32:1];
  8702. * } else {
  8703. * Rd.W[x] = Mres[x][47:16];
  8704. * }
  8705. * for RV32: x=0
  8706. * for RV64: x=1...0
  8707. * ~~~
  8708. *
  8709. * \param [in] a long type of value stored in a
  8710. * \param [in] b unsigned long type of value stored in b
  8711. * \return value stored in long type
  8712. */
  8713. __STATIC_FORCEINLINE long __RV_SMMWB_U(long a, unsigned long b)
  8714. {
  8715. long result;
  8716. __ASM volatile("smmwb.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8717. return result;
  8718. }
  8719. /* ===== Inline Function End for 3.118.2. SMMWB.u ===== */
  8720. /* ===== Inline Function Start for 3.119.1. SMMWT ===== */
  8721. /**
  8722. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  8723. * \brief SMMWT (SIMD MSW Signed Multiply Word and Top Half)
  8724. * \details
  8725. * **Type**: SIMD
  8726. *
  8727. * **Syntax**:\n
  8728. * ~~~
  8729. * SMMWT Rd, Rs1, Rs2
  8730. * SMMWT.u Rd, Rs1, Rs2
  8731. * ~~~
  8732. *
  8733. * **Purpose**:\n
  8734. * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
  8735. * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
  8736. * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
  8737. * significant discarded bit.
  8738. *
  8739. * **Description**:\n
  8740. * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of
  8741. * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
  8742. * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
  8743. * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
  8744. *
  8745. * **Operations**:\n
  8746. * ~~~
  8747. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
  8748. * if (`.u` form) {
  8749. * Round[x][32:0] = Mres[x][47:15] + 1;
  8750. * Rd.W[x] = Round[x][32:1];
  8751. * } else {
  8752. * Rd.W[x] = Mres[x][47:16];
  8753. * }
  8754. * for RV32: x=0
  8755. * for RV64: x=1...0
  8756. * ~~~
  8757. *
  8758. * \param [in] a long type of value stored in a
  8759. * \param [in] b unsigned long type of value stored in b
  8760. * \return value stored in long type
  8761. */
  8762. __STATIC_FORCEINLINE long __RV_SMMWT(long a, unsigned long b)
  8763. {
  8764. long result;
  8765. __ASM volatile("smmwt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8766. return result;
  8767. }
  8768. /* ===== Inline Function End for 3.119.1. SMMWT ===== */
  8769. /* ===== Inline Function Start for 3.119.2. SMMWT.u ===== */
  8770. /**
  8771. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  8772. * \brief SMMWT.u (SIMD MSW Signed Multiply Word and Top Half with Rounding)
  8773. * \details
  8774. * **Type**: SIMD
  8775. *
  8776. * **Syntax**:\n
  8777. * ~~~
  8778. * SMMWT Rd, Rs1, Rs2
  8779. * SMMWT.u Rd, Rs1, Rs2
  8780. * ~~~
  8781. *
  8782. * **Purpose**:\n
  8783. * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
  8784. * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
  8785. * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
  8786. * significant discarded bit.
  8787. *
  8788. * **Description**:\n
  8789. * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of
  8790. * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
  8791. * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
  8792. * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
  8793. *
  8794. * **Operations**:\n
  8795. * ~~~
  8796. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
  8797. * if (`.u` form) {
  8798. * Round[x][32:0] = Mres[x][47:15] + 1;
  8799. * Rd.W[x] = Round[x][32:1];
  8800. * } else {
  8801. * Rd.W[x] = Mres[x][47:16];
  8802. * }
  8803. * for RV32: x=0
  8804. * for RV64: x=1...0
  8805. * ~~~
  8806. *
  8807. * \param [in] a long type of value stored in a
  8808. * \param [in] b unsigned long type of value stored in b
  8809. * \return value stored in long type
  8810. */
  8811. __STATIC_FORCEINLINE long __RV_SMMWT_U(long a, unsigned long b)
  8812. {
  8813. long result;
  8814. __ASM volatile("smmwt.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8815. return result;
  8816. }
  8817. /* ===== Inline Function End for 3.119.2. SMMWT.u ===== */
  8818. /* ===== Inline Function Start for 3.120.1. SMSLDA ===== */
  8819. /**
  8820. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  8821. * \brief SMSLDA (Signed Multiply Two Halfs & Add & Subtract 64-bit)
  8822. * \details
  8823. * **Type**: DSP (64-bit Profile)
  8824. *
  8825. * **Syntax**:\n
  8826. * ~~~
  8827. * SMSLDA Rd, Rs1, Rs2
  8828. * SMSLXDA Rd, Rs1, Rs2
  8829. * ~~~
  8830. *
  8831. * **Purpose**:\n
  8832. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  8833. * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a
  8834. * register (RV64). The subtraction result is written back to the register-pair.
  8835. * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements)
  8836. * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements)
  8837. *
  8838. * **RV32 Description**:\n
  8839. * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  8840. * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
  8841. * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
  8842. * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
  8843. * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers
  8844. * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit
  8845. * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  8846. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  8847. * includes register 2d and 2d+1.
  8848. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  8849. * of the pair contains the low 32-bit of the result.
  8850. *
  8851. * **RV64 Description**:\n
  8852. * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8853. * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
  8854. * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  8855. * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8856. * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of
  8857. * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  8858. * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction
  8859. * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated
  8860. * as signed integers.
  8861. *
  8862. * **Operations**:\n
  8863. * ~~~
  8864. * * RV32:
  8865. * // SMSLDA
  8866. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
  8867. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
  8868. * // SMSLXDA
  8869. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
  8870. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
  8871. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  8872. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]);
  8873. * * RV64:
  8874. * // SMSLDA
  8875. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  8876. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  8877. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  8878. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  8879. * // SMSLXDA
  8880. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  8881. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
  8882. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  8883. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
  8884. * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) -
  8885. * SE64(Mres1[1][31:0]);
  8886. * ~~~
  8887. *
  8888. * \param [in] t long long type of value stored in t
  8889. * \param [in] a unsigned long type of value stored in a
  8890. * \param [in] b unsigned long type of value stored in b
  8891. * \return value stored in long long type
  8892. */
  8893. __STATIC_FORCEINLINE long long __RV_SMSLDA(long long t, unsigned long a, unsigned long b)
  8894. {
  8895. __ASM volatile("smslda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  8896. return t;
  8897. }
  8898. /* ===== Inline Function End for 3.120.1. SMSLDA ===== */
  8899. /* ===== Inline Function Start for 3.120.2. SMSLXDA ===== */
  8900. /**
  8901. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  8902. * \brief SMSLXDA (Signed Crossed Multiply Two Halfs & Add & Subtract 64- bit)
  8903. * \details
  8904. * **Type**: DSP (64-bit Profile)
  8905. *
  8906. * **Syntax**:\n
  8907. * ~~~
  8908. * SMSLDA Rd, Rs1, Rs2
  8909. * SMSLXDA Rd, Rs1, Rs2
  8910. * ~~~
  8911. *
  8912. * **Purpose**:\n
  8913. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  8914. * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a
  8915. * register (RV64). The subtraction result is written back to the register-pair.
  8916. * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements)
  8917. * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements)
  8918. *
  8919. * **RV32 Description**:\n
  8920. * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  8921. * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
  8922. * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
  8923. * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
  8924. * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers
  8925. * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit
  8926. * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  8927. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  8928. * includes register 2d and 2d+1.
  8929. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  8930. * of the pair contains the low 32-bit of the result.
  8931. *
  8932. * **RV64 Description**:\n
  8933. * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8934. * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
  8935. * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  8936. * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8937. * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of
  8938. * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  8939. * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction
  8940. * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated
  8941. * as signed integers.
  8942. *
  8943. * **Operations**:\n
  8944. * ~~~
  8945. * * RV32:
  8946. * // SMSLDA
  8947. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
  8948. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
  8949. * // SMSLXDA
  8950. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
  8951. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
  8952. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  8953. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]);
  8954. * * RV64:
  8955. * // SMSLDA
  8956. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  8957. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  8958. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  8959. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  8960. * // SMSLXDA
  8961. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  8962. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
  8963. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  8964. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
  8965. * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) -
  8966. * SE64(Mres1[1][31:0]);
  8967. * ~~~
  8968. *
  8969. * \param [in] t long long type of value stored in t
  8970. * \param [in] a unsigned long type of value stored in a
  8971. * \param [in] b unsigned long type of value stored in b
  8972. * \return value stored in long long type
  8973. */
  8974. __STATIC_FORCEINLINE long long __RV_SMSLXDA(long long t, unsigned long a, unsigned long b)
  8975. {
  8976. __ASM volatile("smslxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  8977. return t;
  8978. }
  8979. /* ===== Inline Function End for 3.120.2. SMSLXDA ===== */
  8980. /* ===== Inline Function Start for 3.121. SMSR64 ===== */
  8981. /**
  8982. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  8983. * \brief SMSR64 (Signed Multiply and Subtract from 64- Bit Data)
  8984. * \details
  8985. * **Type**: DSP (64-bit Profile)
  8986. *
  8987. * **Syntax**:\n
  8988. * ~~~
  8989. * SMSR64 Rd, Rs1, Rs2
  8990. * ~~~
  8991. *
  8992. * **Purpose**:\n
  8993. * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication
  8994. * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
  8995. * written back to the pair of registers (RV32) or a register (RV64).
  8996. *
  8997. * **RV32 Description**:\n
  8998. * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It
  8999. * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers
  9000. * specified by Rd(4,1). The subtraction result is written back to the even/odd pair of registers
  9001. * specified by Rd(4,1).
  9002. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  9003. * includes register 2d and 2d+1.
  9004. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  9005. * of the pair contains the low 32-bit of the result.
  9006. *
  9007. * **RV64 Description**:\n
  9008. * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
  9009. * subtracts the 64-bit multiplication results from the 64-bit signed data of Rd. The subtraction result is
  9010. * written back to Rd.
  9011. *
  9012. * **Operations**:\n
  9013. * ~~~
  9014. * * RV32:
  9015. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  9016. * R[t_H].R[t_L] = R[t_H].R[t_L] - (Rs1 * Rs2);
  9017. * * RV64:
  9018. * Rd = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]);
  9019. * ~~~
  9020. *
  9021. * \param [in] t long long type of value stored in t
  9022. * \param [in] a long type of value stored in a
  9023. * \param [in] b long type of value stored in b
  9024. * \return value stored in long long type
  9025. */
  9026. __STATIC_FORCEINLINE long long __RV_SMSR64(long long t, long a, long b)
  9027. {
  9028. __ASM volatile("smsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  9029. return t;
  9030. }
  9031. /* ===== Inline Function End for 3.121. SMSR64 ===== */
  9032. /* ===== Inline Function Start for 3.122.1. SMUL8 ===== */
  9033. /**
  9034. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
  9035. * \brief SMUL8 (SIMD Signed 8-bit Multiply)
  9036. * \details
  9037. * **Type**: SIMD
  9038. *
  9039. * **Syntax**:\n
  9040. * ~~~
  9041. * SMUL8 Rd, Rs1, Rs2
  9042. * SMULX8 Rd, Rs1, Rs2
  9043. * ~~~
  9044. *
  9045. * **Purpose**:\n
  9046. * Do signed 8-bit multiplications and generate four 16-bit results simultaneously.
  9047. *
  9048. * **RV32 Description**:\n
  9049. * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
  9050. * corresponding 8-bit data elements of Rs2.
  9051. * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
  9052. * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
  9053. * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
  9054. * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
  9055. * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  9056. * includes register 2d and 2d+1.
  9057. * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
  9058. * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
  9059. * part of Rs1.
  9060. *
  9061. * **RV64 Description**:\n
  9062. * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
  9063. * corresponding 8-bit data elements of Rs2.
  9064. * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
  9065. * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
  9066. * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
  9067. * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
  9068. * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
  9069. * the bottom part of Rs1.
  9070. *
  9071. * **Operations**:\n
  9072. * ~~~
  9073. * * RV32:
  9074. * if (is `SMUL8`) {
  9075. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  9076. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  9077. * } else if (is `SMULX8`) {
  9078. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  9079. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  9080. * }
  9081. * rest[x/2] = op1t[x/2] s* op2t[x/2];
  9082. * resb[x/2] = op1b[x/2] s* op2b[x/2];
  9083. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  9084. * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
  9085. * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
  9086. * x = 0 and 2
  9087. * * RV64:
  9088. * if (is `SMUL8`) {
  9089. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  9090. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  9091. * } else if (is `SMULX8`) {
  9092. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  9093. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  9094. * }
  9095. * rest[x/2] = op1t[x/2] s* op2t[x/2];
  9096. * resb[x/2] = op1b[x/2] s* op2b[x/2];
  9097. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  9098. * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
  9099. * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0];
  9100. * x = 0 and 2
  9101. * ~~~
  9102. *
  9103. * \param [in] a unsigned int type of value stored in a
  9104. * \param [in] b unsigned int type of value stored in b
  9105. * \return value stored in unsigned long long type
  9106. */
  9107. __STATIC_FORCEINLINE unsigned long long __RV_SMUL8(unsigned int a, unsigned int b)
  9108. {
  9109. unsigned long long result;
  9110. __ASM volatile("smul8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9111. return result;
  9112. }
  9113. /* ===== Inline Function End for 3.122.1. SMUL8 ===== */
  9114. /* ===== Inline Function Start for 3.122.2. SMULX8 ===== */
  9115. /**
  9116. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
  9117. * \brief SMULX8 (SIMD Signed Crossed 8-bit Multiply)
  9118. * \details
  9119. * **Type**: SIMD
  9120. *
  9121. * **Syntax**:\n
  9122. * ~~~
  9123. * SMUL8 Rd, Rs1, Rs2
  9124. * SMULX8 Rd, Rs1, Rs2
  9125. * ~~~
  9126. *
  9127. * **Purpose**:\n
  9128. * Do signed 8-bit multiplications and generate four 16-bit results simultaneously.
  9129. *
  9130. * **RV32 Description**:\n
  9131. * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
  9132. * corresponding 8-bit data elements of Rs2.
  9133. * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
  9134. * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
  9135. * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
  9136. * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
  9137. * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  9138. * includes register 2d and 2d+1.
  9139. * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
  9140. * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
  9141. * part of Rs1.
  9142. *
  9143. * **RV64 Description**:\n
  9144. * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
  9145. * corresponding 8-bit data elements of Rs2.
  9146. * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
  9147. * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
  9148. * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
  9149. * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
  9150. * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
  9151. * the bottom part of Rs1.
  9152. *
  9153. * **Operations**:\n
  9154. * ~~~
  9155. * * RV32:
  9156. * if (is `SMUL8`) {
  9157. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  9158. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  9159. * } else if (is `SMULX8`) {
  9160. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  9161. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  9162. * }
  9163. * rest[x/2] = op1t[x/2] s* op2t[x/2];
  9164. * resb[x/2] = op1b[x/2] s* op2b[x/2];
  9165. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  9166. * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
  9167. * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
  9168. * x = 0 and 2
  9169. * * RV64:
  9170. * if (is `SMUL8`) {
  9171. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  9172. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  9173. * } else if (is `SMULX8`) {
  9174. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  9175. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  9176. * }
  9177. * rest[x/2] = op1t[x/2] s* op2t[x/2];
  9178. * resb[x/2] = op1b[x/2] s* op2b[x/2];
  9179. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  9180. * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
  9181. * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0];
  9182. * x = 0 and 2
  9183. * ~~~
  9184. *
  9185. * \param [in] a unsigned int type of value stored in a
  9186. * \param [in] b unsigned int type of value stored in b
  9187. * \return value stored in unsigned long long type
  9188. */
  9189. __STATIC_FORCEINLINE unsigned long long __RV_SMULX8(unsigned int a, unsigned int b)
  9190. {
  9191. unsigned long long result;
  9192. __ASM volatile("smulx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9193. return result;
  9194. }
  9195. /* ===== Inline Function End for 3.122.2. SMULX8 ===== */
  9196. /* ===== Inline Function Start for 3.123.1. SMUL16 ===== */
  9197. /**
  9198. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
  9199. * \brief SMUL16 (SIMD Signed 16-bit Multiply)
  9200. * \details
  9201. * **Type**: SIMD
  9202. *
  9203. * **Syntax**:\n
  9204. * ~~~
  9205. * SMUL16 Rd, Rs1, Rs2
  9206. * SMULX16 Rd, Rs1, Rs2
  9207. * ~~~
  9208. *
  9209. * **Purpose**:\n
  9210. * Do signed 16-bit multiplications and generate two 32-bit results simultaneously.
  9211. *
  9212. * **RV32 Description**:\n
  9213. * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with
  9214. * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1
  9215. * with the bottom 16-bit Q15 content of Rs2.
  9216. * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit
  9217. * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16-
  9218. * bit Q15 content of Rs2.
  9219. * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
  9220. * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
  9221. * register 2d and 2d+1.
  9222. * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
  9223. * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
  9224. *
  9225. * **RV64 Description**:\n
  9226. * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower
  9227. * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time,
  9228. * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15
  9229. * content of the lower 32-bit word in Rs2.
  9230. * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1
  9231. * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the
  9232. * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the
  9233. * lower 32-bit word in Rs2.
  9234. * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the
  9235. * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
  9236. * the lower 32-bit word in Rs1 is written to Rd.W[0]
  9237. *
  9238. * **Operations**:\n
  9239. * ~~~
  9240. * * RV32:
  9241. * if (is `SMUL16`) {
  9242. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  9243. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  9244. * } else if (is `SMULX16`) {
  9245. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  9246. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  9247. * }
  9248. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  9249. * res = aop s* bop;
  9250. * }
  9251. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  9252. * R[t_H] = rest;
  9253. * R[t_L] = resb;
  9254. * * RV64:
  9255. * if (is `SMUL16`) {
  9256. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  9257. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  9258. * } else if (is `SMULX16`) {
  9259. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  9260. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  9261. * }
  9262. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  9263. * res = aop s* bop;
  9264. * }
  9265. * Rd.W[1] = rest;
  9266. * Rd.W[0] = resb;
  9267. * ~~~
  9268. *
  9269. * \param [in] a unsigned int type of value stored in a
  9270. * \param [in] b unsigned int type of value stored in b
  9271. * \return value stored in unsigned long long type
  9272. */
  9273. __STATIC_FORCEINLINE unsigned long long __RV_SMUL16(unsigned int a, unsigned int b)
  9274. {
  9275. unsigned long long result;
  9276. __ASM volatile("smul16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9277. return result;
  9278. }
  9279. /* ===== Inline Function End for 3.123.1. SMUL16 ===== */
  9280. /* ===== Inline Function Start for 3.123.2. SMULX16 ===== */
  9281. /**
  9282. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
  9283. * \brief SMULX16 (SIMD Signed Crossed 16-bit Multiply)
  9284. * \details
  9285. * **Type**: SIMD
  9286. *
  9287. * **Syntax**:\n
  9288. * ~~~
  9289. * SMUL16 Rd, Rs1, Rs2
  9290. * SMULX16 Rd, Rs1, Rs2
  9291. * ~~~
  9292. *
  9293. * **Purpose**:\n
  9294. * Do signed 16-bit multiplications and generate two 32-bit results simultaneously.
  9295. *
  9296. * **RV32 Description**:\n
  9297. * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with
  9298. * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1
  9299. * with the bottom 16-bit Q15 content of Rs2.
  9300. * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit
  9301. * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16-
  9302. * bit Q15 content of Rs2.
  9303. * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
  9304. * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
  9305. * register 2d and 2d+1.
  9306. * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
  9307. * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
  9308. *
  9309. * **RV64 Description**:\n
  9310. * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower
  9311. * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time,
  9312. * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15
  9313. * content of the lower 32-bit word in Rs2.
  9314. * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1
  9315. * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the
  9316. * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the
  9317. * lower 32-bit word in Rs2.
  9318. * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the
  9319. * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
  9320. * the lower 32-bit word in Rs1 is written to Rd.W[0]
  9321. *
  9322. * **Operations**:\n
  9323. * ~~~
  9324. * * RV32:
  9325. * if (is `SMUL16`) {
  9326. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  9327. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  9328. * } else if (is `SMULX16`) {
  9329. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  9330. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  9331. * }
  9332. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  9333. * res = aop s* bop;
  9334. * }
  9335. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  9336. * R[t_H] = rest;
  9337. * R[t_L] = resb;
  9338. * * RV64:
  9339. * if (is `SMUL16`) {
  9340. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  9341. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  9342. * } else if (is `SMULX16`) {
  9343. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  9344. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  9345. * }
  9346. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  9347. * res = aop s* bop;
  9348. * }
  9349. * Rd.W[1] = rest;
  9350. * Rd.W[0] = resb;
  9351. * ~~~
  9352. *
  9353. * \param [in] a unsigned int type of value stored in a
  9354. * \param [in] b unsigned int type of value stored in b
  9355. * \return value stored in unsigned long long type
  9356. */
  9357. __STATIC_FORCEINLINE unsigned long long __RV_SMULX16(unsigned int a, unsigned int b)
  9358. {
  9359. unsigned long long result;
  9360. __ASM volatile("smulx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9361. return result;
  9362. }
  9363. /* ===== Inline Function End for 3.123.2. SMULX16 ===== */
  9364. /* ===== Inline Function Start for 3.124. SRA.u ===== */
  9365. /**
  9366. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  9367. * \brief SRA.u (Rounding Shift Right Arithmetic)
  9368. * \details
  9369. * **Type**: DSP
  9370. *
  9371. * **Syntax**:\n
  9372. * ~~~
  9373. * SRA.u Rd, Rs1, Rs2
  9374. * ~~~
  9375. *
  9376. * **Purpose**:\n
  9377. * Perform an arithmetic right shift operation with rounding. The shift amount is a variable
  9378. * from a GPR.
  9379. *
  9380. * **Description**:\n
  9381. * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are
  9382. * filled with the sign-bit and the shift amount is specified by the low-order 5-bits (RV32) or 6-bits
  9383. * (RV64) of the Rs2 register. For the rounding operation, a value of 1 is added to the most significant
  9384. * discarded bit of the data to calculate the final result. And the result is written to Rd.
  9385. *
  9386. * **Operations**:\n
  9387. * ~~~
  9388. * * RV32:
  9389. * sa = Rs2[4:0];
  9390. * if (sa > 0) {
  9391. * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
  9392. * Rd = res[31:0];
  9393. * } else {
  9394. * Rd = Rs1;
  9395. * }
  9396. * * RV64:
  9397. * sa = Rs2[5:0];
  9398. * if (sa > 0) {
  9399. * res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1;
  9400. * Rd = res[63:0];
  9401. * } else {
  9402. * Rd = Rs1;
  9403. * }
  9404. * ~~~
  9405. *
  9406. * \param [in] a long type of value stored in a
  9407. * \param [in] b unsigned int type of value stored in b
  9408. * \return value stored in long type
  9409. */
  9410. __STATIC_FORCEINLINE long __RV_SRA_U(long a, unsigned int b)
  9411. {
  9412. long result;
  9413. __ASM volatile("sra.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9414. return result;
  9415. }
  9416. /* ===== Inline Function End for 3.124. SRA.u ===== */
  9417. /* ===== Inline Function Start for 3.125. SRAI.u ===== */
  9418. /**
  9419. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  9420. * \brief SRAI.u (Rounding Shift Right Arithmetic Immediate)
  9421. * \details
  9422. * **Type**: DSP
  9423. *
  9424. * **Syntax**:\n
  9425. * ~~~
  9426. * SRAI.u Rd, Rs1, imm6u[4:0] (RV32)
  9427. * SRAI.u Rd, Rs1, imm6u[5:0] (RV64)
  9428. * ~~~
  9429. *
  9430. * **Purpose**:\n
  9431. * Perform an arithmetic right shift operation with rounding. The shift amount is an
  9432. * immediate value.
  9433. *
  9434. * **Description**:\n
  9435. * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are
  9436. * filled with the sign-bit and the shift amount is specified by the imm6u[4:0] (RV32) or imm6u[5:0]
  9437. * (RV64) constant . For the rounding operation, a value of 1 is added to the most significant discarded
  9438. * bit of the data to calculate the final result. And the result is written to Rd.
  9439. *
  9440. * **Operations**:\n
  9441. * ~~~
  9442. * * RV32:
  9443. * sa = imm6u[4:0];
  9444. * if (sa > 0) {
  9445. * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
  9446. * Rd = res[31:0];
  9447. * } else {
  9448. * Rd = Rs1;
  9449. * }
  9450. * * RV64:
  9451. * sa = imm6u[5:0];
  9452. * if (sa > 0) {
  9453. * res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1;
  9454. * Rd = res[63:0];
  9455. * } else {
  9456. * Rd = Rs1;
  9457. * }
  9458. * ~~~
  9459. *
  9460. * \param [in] a long type of value stored in a
  9461. * \param [in] b unsigned int type of value stored in b
  9462. * \return value stored in long type
  9463. */
  9464. #define __RV_SRAI_U(a, b) \
  9465. ({ \
  9466. long result; \
  9467. long __a = (long)(a); \
  9468. __ASM volatile("srai.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  9469. result; \
  9470. })
  9471. /* ===== Inline Function End for 3.125. SRAI.u ===== */
  9472. /* ===== Inline Function Start for 3.126.1. SRA8 ===== */
  9473. /**
  9474. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  9475. * \brief SRA8 (SIMD 8-bit Shift Right Arithmetic)
  9476. * \details
  9477. * **Type**: SIMD
  9478. *
  9479. * **Syntax**:\n
  9480. * ~~~
  9481. * SRA8 Rd, Rs1, Rs2
  9482. * SRA8.u Rd, Rs1, Rs2
  9483. * ~~~
  9484. *
  9485. * **Purpose**:\n
  9486. * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a
  9487. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  9488. * results.
  9489. *
  9490. * **Description**:\n
  9491. * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9492. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
  9493. * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
  9494. * added to the most significant discarded bit of each 8-bit data element to calculate the final results.
  9495. * And the results are written to Rd.
  9496. *
  9497. * **Operations**:\n
  9498. * ~~~
  9499. * sa = Rs2[2:0];
  9500. * if (sa > 0) {
  9501. * if (`.u` form) { // SRA8.u
  9502. * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
  9503. * Rd.B[x] = res[7:0];
  9504. * } else { // SRA8
  9505. * Rd.B[x] = SE8(Rd.B[x][7:sa])
  9506. * }
  9507. * } else {
  9508. * Rd = Rs1;
  9509. * }
  9510. * for RV32: x=3...0,
  9511. * for RV64: x=7...0
  9512. * ~~~
  9513. *
  9514. * \param [in] a unsigned long type of value stored in a
  9515. * \param [in] b unsigned int type of value stored in b
  9516. * \return value stored in unsigned long type
  9517. */
  9518. __STATIC_FORCEINLINE unsigned long __RV_SRA8(unsigned long a, unsigned int b)
  9519. {
  9520. unsigned long result;
  9521. __ASM volatile("sra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9522. return result;
  9523. }
  9524. /* ===== Inline Function End for 3.126.1. SRA8 ===== */
  9525. /* ===== Inline Function Start for 3.126.2. SRA8.u ===== */
  9526. /**
  9527. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  9528. * \brief SRA8.u (SIMD 8-bit Rounding Shift Right Arithmetic)
  9529. * \details
  9530. * **Type**: SIMD
  9531. *
  9532. * **Syntax**:\n
  9533. * ~~~
  9534. * SRA8 Rd, Rs1, Rs2
  9535. * SRA8.u Rd, Rs1, Rs2
  9536. * ~~~
  9537. *
  9538. * **Purpose**:\n
  9539. * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a
  9540. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  9541. * results.
  9542. *
  9543. * **Description**:\n
  9544. * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9545. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
  9546. * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
  9547. * added to the most significant discarded bit of each 8-bit data element to calculate the final results.
  9548. * And the results are written to Rd.
  9549. *
  9550. * **Operations**:\n
  9551. * ~~~
  9552. * sa = Rs2[2:0];
  9553. * if (sa > 0) {
  9554. * if (`.u` form) { // SRA8.u
  9555. * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
  9556. * Rd.B[x] = res[7:0];
  9557. * } else { // SRA8
  9558. * Rd.B[x] = SE8(Rd.B[x][7:sa])
  9559. * }
  9560. * } else {
  9561. * Rd = Rs1;
  9562. * }
  9563. * for RV32: x=3...0,
  9564. * for RV64: x=7...0
  9565. * ~~~
  9566. *
  9567. * \param [in] a unsigned long type of value stored in a
  9568. * \param [in] b unsigned int type of value stored in b
  9569. * \return value stored in unsigned long type
  9570. */
  9571. __STATIC_FORCEINLINE unsigned long __RV_SRA8_U(unsigned long a, unsigned int b)
  9572. {
  9573. unsigned long result;
  9574. __ASM volatile("sra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9575. return result;
  9576. }
  9577. /* ===== Inline Function End for 3.126.2. SRA8.u ===== */
  9578. /* ===== Inline Function Start for 3.127.1. SRAI8 ===== */
  9579. /**
  9580. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  9581. * \brief SRAI8 (SIMD 8-bit Shift Right Arithmetic Immediate)
  9582. * \details
  9583. * **Type**: SIMD
  9584. *
  9585. * **Syntax**:\n
  9586. * ~~~
  9587. * SRAI8 Rd, Rs1, imm3u
  9588. * SRAI8.u Rd, Rs1, imm3u
  9589. * ~~~
  9590. *
  9591. * **Purpose**:\n
  9592. * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an
  9593. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  9594. *
  9595. * **Description**:\n
  9596. * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9597. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u
  9598. * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
  9599. * discarded bit of each 8-bit data element to calculate the final results. And the results are written to
  9600. * Rd.
  9601. *
  9602. * **Operations**:\n
  9603. * ~~~
  9604. * sa = imm3u[2:0];
  9605. * if (sa > 0) {
  9606. * if (`.u` form) { // SRA8.u
  9607. * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
  9608. * Rd.B[x] = res[7:0];
  9609. * } else { // SRA8
  9610. * Rd.B[x] = SE8(Rd.B[x][7:sa])
  9611. * }
  9612. * } else {
  9613. * Rd = Rs1;
  9614. * }
  9615. * for RV32: x=3...0,
  9616. * for RV64: x=7...0
  9617. * ~~~
  9618. *
  9619. * \param [in] a unsigned long type of value stored in a
  9620. * \param [in] b unsigned int type of value stored in b
  9621. * \return value stored in unsigned long type
  9622. */
  9623. #define __RV_SRAI8(a, b) \
  9624. ({ \
  9625. unsigned long result; \
  9626. unsigned long __a = (unsigned long)(a); \
  9627. __ASM volatile("srai8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  9628. result; \
  9629. })
  9630. /* ===== Inline Function End for 3.127.1. SRAI8 ===== */
  9631. /* ===== Inline Function Start for 3.127.2. SRAI8.u ===== */
  9632. /**
  9633. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  9634. * \brief SRAI8.u (SIMD 8-bit Rounding Shift Right Arithmetic Immediate)
  9635. * \details
  9636. * **Type**: SIMD
  9637. *
  9638. * **Syntax**:\n
  9639. * ~~~
  9640. * SRAI8 Rd, Rs1, imm3u
  9641. * SRAI8.u Rd, Rs1, imm3u
  9642. * ~~~
  9643. *
  9644. * **Purpose**:\n
  9645. * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an
  9646. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  9647. *
  9648. * **Description**:\n
  9649. * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9650. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u
  9651. * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
  9652. * discarded bit of each 8-bit data element to calculate the final results. And the results are written to
  9653. * Rd.
  9654. *
  9655. * **Operations**:\n
  9656. * ~~~
  9657. * sa = imm3u[2:0];
  9658. * if (sa > 0) {
  9659. * if (`.u` form) { // SRA8.u
  9660. * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
  9661. * Rd.B[x] = res[7:0];
  9662. * } else { // SRA8
  9663. * Rd.B[x] = SE8(Rd.B[x][7:sa])
  9664. * }
  9665. * } else {
  9666. * Rd = Rs1;
  9667. * }
  9668. * for RV32: x=3...0,
  9669. * for RV64: x=7...0
  9670. * ~~~
  9671. *
  9672. * \param [in] a unsigned long type of value stored in a
  9673. * \param [in] b unsigned int type of value stored in b
  9674. * \return value stored in unsigned long type
  9675. */
  9676. #define __RV_SRAI8_U(a, b) \
  9677. ({ \
  9678. unsigned long result; \
  9679. unsigned long __a = (unsigned long)(a); \
  9680. __ASM volatile("srai8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  9681. result; \
  9682. })
  9683. /* ===== Inline Function End for 3.127.2. SRAI8.u ===== */
  9684. /* ===== Inline Function Start for 3.128.1. SRA16 ===== */
  9685. /**
  9686. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  9687. * \brief SRA16 (SIMD 16-bit Shift Right Arithmetic)
  9688. * \details
  9689. * **Type**: SIMD
  9690. *
  9691. * **Syntax**:\n
  9692. * ~~~
  9693. * SRA16 Rd, Rs1, Rs2
  9694. * SRA16.u Rd, Rs1, Rs2
  9695. * ~~~
  9696. *
  9697. * **Purpose**:\n
  9698. * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a
  9699. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  9700. * results.
  9701. *
  9702. * **Description**:\n
  9703. * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9704. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
  9705. * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
  9706. * added to the most significant discarded bit of each 16-bit data element to calculate the final results.
  9707. * And the results are written to Rd.
  9708. *
  9709. * **Operations**:\n
  9710. * ~~~
  9711. * sa = Rs2[3:0];
  9712. * if (sa != 0) {
  9713. * if (`.u` form) { // SRA16.u
  9714. * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
  9715. * Rd.H[x] = res[15:0];
  9716. * } else { // SRA16
  9717. * Rd.H[x] = SE16(Rs1.H[x][15:sa])
  9718. * }
  9719. * } else {
  9720. * Rd = Rs1;
  9721. * }
  9722. * for RV32: x=1...0,
  9723. * for RV64: x=3...0
  9724. * ~~~
  9725. *
  9726. * \param [in] a unsigned long type of value stored in a
  9727. * \param [in] b unsigned long type of value stored in b
  9728. * \return value stored in unsigned long type
  9729. */
  9730. __STATIC_FORCEINLINE unsigned long __RV_SRA16(unsigned long a, unsigned long b)
  9731. {
  9732. unsigned long result;
  9733. __ASM volatile("sra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9734. return result;
  9735. }
  9736. /* ===== Inline Function End for 3.128.1. SRA16 ===== */
  9737. /* ===== Inline Function Start for 3.128.2. SRA16.u ===== */
  9738. /**
  9739. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  9740. * \brief SRA16.u (SIMD 16-bit Rounding Shift Right Arithmetic)
  9741. * \details
  9742. * **Type**: SIMD
  9743. *
  9744. * **Syntax**:\n
  9745. * ~~~
  9746. * SRA16 Rd, Rs1, Rs2
  9747. * SRA16.u Rd, Rs1, Rs2
  9748. * ~~~
  9749. *
  9750. * **Purpose**:\n
  9751. * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a
  9752. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  9753. * results.
  9754. *
  9755. * **Description**:\n
  9756. * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9757. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
  9758. * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
  9759. * added to the most significant discarded bit of each 16-bit data element to calculate the final results.
  9760. * And the results are written to Rd.
  9761. *
  9762. * **Operations**:\n
  9763. * ~~~
  9764. * sa = Rs2[3:0];
  9765. * if (sa != 0) {
  9766. * if (`.u` form) { // SRA16.u
  9767. * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
  9768. * Rd.H[x] = res[15:0];
  9769. * } else { // SRA16
  9770. * Rd.H[x] = SE16(Rs1.H[x][15:sa])
  9771. * }
  9772. * } else {
  9773. * Rd = Rs1;
  9774. * }
  9775. * for RV32: x=1...0,
  9776. * for RV64: x=3...0
  9777. * ~~~
  9778. *
  9779. * \param [in] a unsigned long type of value stored in a
  9780. * \param [in] b unsigned long type of value stored in b
  9781. * \return value stored in unsigned long type
  9782. */
  9783. __STATIC_FORCEINLINE unsigned long __RV_SRA16_U(unsigned long a, unsigned long b)
  9784. {
  9785. unsigned long result;
  9786. __ASM volatile("sra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9787. return result;
  9788. }
  9789. /* ===== Inline Function End for 3.128.2. SRA16.u ===== */
  9790. /* ===== Inline Function Start for 3.129.1. SRAI16 ===== */
  9791. /**
  9792. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  9793. * \brief SRAI16 (SIMD 16-bit Shift Right Arithmetic Immediate)
  9794. * \details
  9795. * **Type**: SIMD
  9796. *
  9797. * **Syntax**:\n
  9798. * ~~~
  9799. * SRAI16 Rd, Rs1, imm4u
  9800. * SRAI16.u Rd, Rs1, imm4u
  9801. * ~~~
  9802. *
  9803. * **Purpose**:\n
  9804. * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is
  9805. * an immediate value. The `.u` form performs additional rounding up operations on the shifted
  9806. * results.
  9807. *
  9808. * **Description**:\n
  9809. * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9810. * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the
  9811. * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
  9812. * significant discarded bit of each 16-bit data to calculate the final results. And the results are written
  9813. * to Rd.
  9814. *
  9815. * **Operations**:\n
  9816. * ~~~
  9817. * sa = imm4u[3:0];
  9818. * if (sa > 0) {
  9819. * if (`.u` form) { // SRAI16.u
  9820. * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
  9821. * Rd.H[x] = res[15:0];
  9822. * } else { // SRAI16
  9823. * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
  9824. * }
  9825. * } else {
  9826. * Rd = Rs1;
  9827. * }
  9828. * for RV32: x=1...0,
  9829. * for RV64: x=3...0
  9830. * ~~~
  9831. *
  9832. * \param [in] a unsigned long type of value stored in a
  9833. * \param [in] b unsigned long type of value stored in b
  9834. * \return value stored in unsigned long type
  9835. */
  9836. #define __RV_SRAI16(a, b) \
  9837. ({ \
  9838. unsigned long result; \
  9839. unsigned long __a = (unsigned long)(a); \
  9840. __ASM volatile("srai16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  9841. result; \
  9842. })
  9843. /* ===== Inline Function End for 3.129.1. SRAI16 ===== */
  9844. /* ===== Inline Function Start for 3.129.2. SRAI16.u ===== */
  9845. /**
  9846. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  9847. * \brief SRAI16.u (SIMD 16-bit Rounding Shift Right Arithmetic Immediate)
  9848. * \details
  9849. * **Type**: SIMD
  9850. *
  9851. * **Syntax**:\n
  9852. * ~~~
  9853. * SRAI16 Rd, Rs1, imm4u
  9854. * SRAI16.u Rd, Rs1, imm4u
  9855. * ~~~
  9856. *
  9857. * **Purpose**:\n
  9858. * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is
  9859. * an immediate value. The `.u` form performs additional rounding up operations on the shifted
  9860. * results.
  9861. *
  9862. * **Description**:\n
  9863. * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9864. * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the
  9865. * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
  9866. * significant discarded bit of each 16-bit data to calculate the final results. And the results are written
  9867. * to Rd.
  9868. *
  9869. * **Operations**:\n
  9870. * ~~~
  9871. * sa = imm4u[3:0];
  9872. * if (sa > 0) {
  9873. * if (`.u` form) { // SRAI16.u
  9874. * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
  9875. * Rd.H[x] = res[15:0];
  9876. * } else { // SRAI16
  9877. * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
  9878. * }
  9879. * } else {
  9880. * Rd = Rs1;
  9881. * }
  9882. * for RV32: x=1...0,
  9883. * for RV64: x=3...0
  9884. * ~~~
  9885. *
  9886. * \param [in] a unsigned long type of value stored in a
  9887. * \param [in] b unsigned long type of value stored in b
  9888. * \return value stored in unsigned long type
  9889. */
  9890. #define __RV_SRAI16_U(a, b) \
  9891. ({ \
  9892. unsigned long result; \
  9893. unsigned long __a = (unsigned long)(a); \
  9894. __ASM volatile("srai16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  9895. result; \
  9896. })
  9897. /* ===== Inline Function End for 3.129.2. SRAI16.u ===== */
  9898. /* ===== Inline Function Start for 3.130.1. SRL8 ===== */
  9899. /**
  9900. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  9901. * \brief SRL8 (SIMD 8-bit Shift Right Logical)
  9902. * \details
  9903. * **Type**: SIMD
  9904. *
  9905. * **Syntax**:\n
  9906. * ~~~
  9907. * SRL8 Rt, Ra, Rb
  9908. * SRL8.u Rt, Ra, Rb
  9909. * ~~~
  9910. *
  9911. * **Purpose**:\n
  9912. * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a
  9913. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  9914. * results.
  9915. *
  9916. * **Description**:\n
  9917. * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
  9918. * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
  9919. * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded
  9920. * bit of each 8-bit data element to calculate the final results. And the results are written to Rd.
  9921. *
  9922. * **Operations**:\n
  9923. * ~~~
  9924. * sa = Rs2[2:0];
  9925. * if (sa > 0) {
  9926. * if (`.u` form) { // SRL8.u
  9927. * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
  9928. * Rd.B[x] = res[8:1];
  9929. * } else { // SRL8
  9930. * Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
  9931. * }
  9932. * } else {
  9933. * Rd = Rs1;
  9934. * }
  9935. * for RV32: x=3...0,
  9936. * for RV64: x=7...0
  9937. * ~~~
  9938. *
  9939. * \param [in] a unsigned long type of value stored in a
  9940. * \param [in] b unsigned int type of value stored in b
  9941. * \return value stored in unsigned long type
  9942. */
  9943. __STATIC_FORCEINLINE unsigned long __RV_SRL8(unsigned long a, unsigned int b)
  9944. {
  9945. unsigned long result;
  9946. __ASM volatile("srl8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9947. return result;
  9948. }
  9949. /* ===== Inline Function End for 3.130.1. SRL8 ===== */
  9950. /* ===== Inline Function Start for 3.130.2. SRL8.u ===== */
  9951. /**
  9952. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  9953. * \brief SRL8.u (SIMD 8-bit Rounding Shift Right Logical)
  9954. * \details
  9955. * **Type**: SIMD
  9956. *
  9957. * **Syntax**:\n
  9958. * ~~~
  9959. * SRL8 Rt, Ra, Rb
  9960. * SRL8.u Rt, Ra, Rb
  9961. * ~~~
  9962. *
  9963. * **Purpose**:\n
  9964. * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a
  9965. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  9966. * results.
  9967. *
  9968. * **Description**:\n
  9969. * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
  9970. * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
  9971. * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded
  9972. * bit of each 8-bit data element to calculate the final results. And the results are written to Rd.
  9973. *
  9974. * **Operations**:\n
  9975. * ~~~
  9976. * sa = Rs2[2:0];
  9977. * if (sa > 0) {
  9978. * if (`.u` form) { // SRL8.u
  9979. * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
  9980. * Rd.B[x] = res[8:1];
  9981. * } else { // SRL8
  9982. * Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
  9983. * }
  9984. * } else {
  9985. * Rd = Rs1;
  9986. * }
  9987. * for RV32: x=3...0,
  9988. * for RV64: x=7...0
  9989. * ~~~
  9990. *
  9991. * \param [in] a unsigned long type of value stored in a
  9992. * \param [in] b unsigned int type of value stored in b
  9993. * \return value stored in unsigned long type
  9994. */
  9995. __STATIC_FORCEINLINE unsigned long __RV_SRL8_U(unsigned long a, unsigned int b)
  9996. {
  9997. unsigned long result;
  9998. __ASM volatile("srl8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9999. return result;
  10000. }
  10001. /* ===== Inline Function End for 3.130.2. SRL8.u ===== */
  10002. /* ===== Inline Function Start for 3.131.1. SRLI8 ===== */
  10003. /**
  10004. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  10005. * \brief SRLI8 (SIMD 8-bit Shift Right Logical Immediate)
  10006. * \details
  10007. * **Type**: SIMD
  10008. *
  10009. * **Syntax**:\n
  10010. * ~~~
  10011. * SRLI8 Rt, Ra, imm3u
  10012. * SRLI8.u Rt, Ra, imm3u
  10013. * ~~~
  10014. *
  10015. * **Purpose**:\n
  10016. * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an
  10017. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  10018. *
  10019. * **Description**:\n
  10020. * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
  10021. * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of
  10022. * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to
  10023. * calculate the final results. And the results are written to Rd.
  10024. *
  10025. * **Operations**:\n
  10026. * ~~~
  10027. * sa = imm3u[2:0];
  10028. * if (sa > 0) {
  10029. * if (`.u` form) { // SRLI8.u
  10030. * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
  10031. * Rd.B[x] = res[8:1];
  10032. * } else { // SRLI8
  10033. * Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
  10034. * }
  10035. * } else {
  10036. * Rd = Rs1;
  10037. * }
  10038. * for RV32: x=3...0,
  10039. * for RV64: x=7...0
  10040. * ~~~
  10041. *
  10042. * \param [in] a unsigned long type of value stored in a
  10043. * \param [in] b unsigned int type of value stored in b
  10044. * \return value stored in unsigned long type
  10045. */
  10046. #define __RV_SRLI8(a, b) \
  10047. ({ \
  10048. unsigned long result; \
  10049. unsigned long __a = (unsigned long)(a); \
  10050. __ASM volatile("srli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  10051. result; \
  10052. })
  10053. /* ===== Inline Function End for 3.131.1. SRLI8 ===== */
  10054. /* ===== Inline Function Start for 3.131.2. SRLI8.u ===== */
  10055. /**
  10056. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  10057. * \brief SRLI8.u (SIMD 8-bit Rounding Shift Right Logical Immediate)
  10058. * \details
  10059. * **Type**: SIMD
  10060. *
  10061. * **Syntax**:\n
  10062. * ~~~
  10063. * SRLI8 Rt, Ra, imm3u
  10064. * SRLI8.u Rt, Ra, imm3u
  10065. * ~~~
  10066. *
  10067. * **Purpose**:\n
  10068. * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an
  10069. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  10070. *
  10071. * **Description**:\n
  10072. * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
  10073. * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of
  10074. * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to
  10075. * calculate the final results. And the results are written to Rd.
  10076. *
  10077. * **Operations**:\n
  10078. * ~~~
  10079. * sa = imm3u[2:0];
  10080. * if (sa > 0) {
  10081. * if (`.u` form) { // SRLI8.u
  10082. * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
  10083. * Rd.B[x] = res[8:1];
  10084. * } else { // SRLI8
  10085. * Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
  10086. * }
  10087. * } else {
  10088. * Rd = Rs1;
  10089. * }
  10090. * for RV32: x=3...0,
  10091. * for RV64: x=7...0
  10092. * ~~~
  10093. *
  10094. * \param [in] a unsigned long type of value stored in a
  10095. * \param [in] b unsigned int type of value stored in b
  10096. * \return value stored in unsigned long type
  10097. */
  10098. #define __RV_SRLI8_U(a, b) \
  10099. ({ \
  10100. unsigned long result; \
  10101. unsigned long __a = (unsigned long)(a); \
  10102. __ASM volatile("srli8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  10103. result; \
  10104. })
  10105. /* ===== Inline Function End for 3.131.2. SRLI8.u ===== */
  10106. /* ===== Inline Function Start for 3.132.1. SRL16 ===== */
  10107. /**
  10108. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  10109. * \brief SRL16 (SIMD 16-bit Shift Right Logical)
  10110. * \details
  10111. * **Type**: SIMD
  10112. *
  10113. * **Syntax**:\n
  10114. * ~~~
  10115. * SRL16 Rt, Ra, Rb
  10116. * SRL16.u Rt, Ra, Rb
  10117. * ~~~
  10118. *
  10119. * **Purpose**:\n
  10120. * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results.
  10121. *
  10122. * **Description**:\n
  10123. * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  10124. * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2
  10125. * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
  10126. * discarded bit of each 16-bit data element to calculate the final results. And the results are written to
  10127. * Rd.
  10128. *
  10129. * **Operations**:\n
  10130. * ~~~
  10131. * sa = Rs2[3:0];
  10132. * if (sa > 0) {
  10133. * if (`.u` form) { // SRL16.u
  10134. * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
  10135. * Rd.H[x] = res[16:1];
  10136. * } else { // SRL16
  10137. * Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
  10138. * }
  10139. * } else {
  10140. * Rd = Rs1;
  10141. * }
  10142. * for RV32: x=1...0,
  10143. * for RV64: x=3...0
  10144. * ~~~
  10145. *
  10146. * \param [in] a unsigned long type of value stored in a
  10147. * \param [in] b unsigned int type of value stored in b
  10148. * \return value stored in unsigned long type
  10149. */
  10150. __STATIC_FORCEINLINE unsigned long __RV_SRL16(unsigned long a, unsigned int b)
  10151. {
  10152. unsigned long result;
  10153. __ASM volatile("srl16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10154. return result;
  10155. }
  10156. /* ===== Inline Function End for 3.132.1. SRL16 ===== */
  10157. /* ===== Inline Function Start for 3.132.2. SRL16.u ===== */
  10158. /**
  10159. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  10160. * \brief SRL16.u (SIMD 16-bit Rounding Shift Right Logical)
  10161. * \details
  10162. * **Type**: SIMD
  10163. *
  10164. * **Syntax**:\n
  10165. * ~~~
  10166. * SRL16 Rt, Ra, Rb
  10167. * SRL16.u Rt, Ra, Rb
  10168. * ~~~
  10169. *
  10170. * **Purpose**:\n
  10171. * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results.
  10172. *
  10173. * **Description**:\n
  10174. * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  10175. * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2
  10176. * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
  10177. * discarded bit of each 16-bit data element to calculate the final results. And the results are written to
  10178. * Rd.
  10179. *
  10180. * **Operations**:\n
  10181. * ~~~
  10182. * sa = Rs2[3:0];
  10183. * if (sa > 0) {
  10184. * if (`.u` form) { // SRL16.u
  10185. * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
  10186. * Rd.H[x] = res[16:1];
  10187. * } else { // SRL16
  10188. * Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
  10189. * }
  10190. * } else {
  10191. * Rd = Rs1;
  10192. * }
  10193. * for RV32: x=1...0,
  10194. * for RV64: x=3...0
  10195. * ~~~
  10196. *
  10197. * \param [in] a unsigned long type of value stored in a
  10198. * \param [in] b unsigned int type of value stored in b
  10199. * \return value stored in unsigned long type
  10200. */
  10201. __STATIC_FORCEINLINE unsigned long __RV_SRL16_U(unsigned long a, unsigned int b)
  10202. {
  10203. unsigned long result;
  10204. __ASM volatile("srl16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10205. return result;
  10206. }
  10207. /* ===== Inline Function End for 3.132.2. SRL16.u ===== */
  10208. /* ===== Inline Function Start for 3.133.1. SRLI16 ===== */
  10209. /**
  10210. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  10211. * \brief SRLI16 (SIMD 16-bit Shift Right Logical Immediate)
  10212. * \details
  10213. * **Type**: SIMD
  10214. *
  10215. * **Syntax**:\n
  10216. * ~~~
  10217. * SRLI16 Rt, Ra, imm4u
  10218. * SRLI16.u Rt, Ra, imm4u
  10219. * ~~~
  10220. *
  10221. * **Purpose**:\n
  10222. * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an
  10223. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  10224. *
  10225. * **Description**:\n
  10226. * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  10227. * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding
  10228. * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit
  10229. * data element to calculate the final results. And the results are written to Rd.
  10230. *
  10231. * **Operations**:\n
  10232. * ~~~
  10233. * sa = imm4u;
  10234. * if (sa > 0) {
  10235. * if (`.u` form) { // SRLI16.u
  10236. * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
  10237. * Rd.H[x] = res[16:1];
  10238. * } else { // SRLI16
  10239. * Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
  10240. * }
  10241. * } else {
  10242. * Rd = Rs1;
  10243. * }
  10244. * for RV32: x=1...0,
  10245. * for RV64: x=3...0
  10246. * ~~~
  10247. *
  10248. * \param [in] a unsigned long type of value stored in a
  10249. * \param [in] b unsigned int type of value stored in b
  10250. * \return value stored in unsigned long type
  10251. */
  10252. #define __RV_SRLI16(a, b) \
  10253. ({ \
  10254. unsigned long result; \
  10255. unsigned long __a = (unsigned long)(a); \
  10256. __ASM volatile("srli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  10257. result; \
  10258. })
  10259. /* ===== Inline Function End for 3.133.1. SRLI16 ===== */
  10260. /* ===== Inline Function Start for 3.133.2. SRLI16.u ===== */
  10261. /**
  10262. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  10263. * \brief SRLI16.u (SIMD 16-bit Rounding Shift Right Logical Immediate)
  10264. * \details
  10265. * **Type**: SIMD
  10266. *
  10267. * **Syntax**:\n
  10268. * ~~~
  10269. * SRLI16 Rt, Ra, imm4u
  10270. * SRLI16.u Rt, Ra, imm4u
  10271. * ~~~
  10272. *
  10273. * **Purpose**:\n
  10274. * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an
  10275. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  10276. *
  10277. * **Description**:\n
  10278. * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  10279. * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding
  10280. * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit
  10281. * data element to calculate the final results. And the results are written to Rd.
  10282. *
  10283. * **Operations**:\n
  10284. * ~~~
  10285. * sa = imm4u;
  10286. * if (sa > 0) {
  10287. * if (`.u` form) { // SRLI16.u
  10288. * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
  10289. * Rd.H[x] = res[16:1];
  10290. * } else { // SRLI16
  10291. * Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
  10292. * }
  10293. * } else {
  10294. * Rd = Rs1;
  10295. * }
  10296. * for RV32: x=1...0,
  10297. * for RV64: x=3...0
  10298. * ~~~
  10299. *
  10300. * \param [in] a unsigned long type of value stored in a
  10301. * \param [in] b unsigned int type of value stored in b
  10302. * \return value stored in unsigned long type
  10303. */
  10304. #define __RV_SRLI16_U(a, b) \
  10305. ({ \
  10306. unsigned long result; \
  10307. unsigned long __a = (unsigned long)(a); \
  10308. __ASM volatile("srli16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  10309. result; \
  10310. })
  10311. /* ===== Inline Function End for 3.133.2. SRLI16.u ===== */
  10312. /* ===== Inline Function Start for 3.134. STAS16 ===== */
  10313. /**
  10314. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  10315. * \brief STAS16 (SIMD 16-bit Straight Addition & Subtraction)
  10316. * \details
  10317. * **Type**: SIMD
  10318. *
  10319. * **Syntax**:\n
  10320. * ~~~
  10321. * STAS16 Rd, Rs1, Rs2
  10322. * ~~~
  10323. *
  10324. * **Purpose**:\n
  10325. * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit
  10326. * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
  10327. *
  10328. * **Description**:\n
  10329. * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with
  10330. * the 16-bit integer element in [31:16] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit
  10331. * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [15:0] of 32-bit chunks in
  10332. * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32-
  10333. * bit chunks in Rd.
  10334. *
  10335. * **Note**:\n
  10336. * This instruction can be used for either signed or unsigned operations.
  10337. *
  10338. * **Operations**:\n
  10339. * ~~~
  10340. * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][31:16];
  10341. * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][15:0];
  10342. * for RV32, x=0
  10343. * for RV64, x=1...0
  10344. * ~~~
  10345. *
  10346. * \param [in] a unsigned long type of value stored in a
  10347. * \param [in] b unsigned long type of value stored in b
  10348. * \return value stored in unsigned long type
  10349. */
  10350. __STATIC_FORCEINLINE unsigned long __RV_STAS16(unsigned long a, unsigned long b)
  10351. {
  10352. unsigned long result;
  10353. __ASM volatile("stas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10354. return result;
  10355. }
  10356. /* ===== Inline Function End for 3.134. STAS16 ===== */
  10357. /* ===== Inline Function Start for 3.135. STSA16 ===== */
  10358. /**
  10359. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  10360. * \brief STSA16 (SIMD 16-bit Straight Subtraction & Addition)
  10361. * \details
  10362. * **Type**: SIMD
  10363. *
  10364. * **Syntax**:\n
  10365. * ~~~
  10366. * STSA16 Rd, Rs1, Rs2
  10367. * ~~~
  10368. *
  10369. * **Purpose**:\n
  10370. * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit
  10371. * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
  10372. *
  10373. * **Description**:\n
  10374. * This instruction subtracts the 16-bit integer element in [31:16] of 32-bit chunks in Rs2
  10375. * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of
  10376. * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [15:0] of 32-bit chunks in
  10377. * Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to [15:0] of
  10378. * 32-bit chunks in Rd.
  10379. *
  10380. * **Note**:\n
  10381. * This instruction can be used for either signed or unsigned operations.
  10382. *
  10383. * **Operations**:\n
  10384. * ~~~
  10385. * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][31:16];
  10386. * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][15:0];
  10387. * for RV32, x=0
  10388. * for RV64, x=1...0
  10389. * ~~~
  10390. *
  10391. * \param [in] a unsigned long type of value stored in a
  10392. * \param [in] b unsigned long type of value stored in b
  10393. * \return value stored in unsigned long type
  10394. */
  10395. __STATIC_FORCEINLINE unsigned long __RV_STSA16(unsigned long a, unsigned long b)
  10396. {
  10397. unsigned long result;
  10398. __ASM volatile("stsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10399. return result;
  10400. }
  10401. /* ===== Inline Function End for 3.135. STSA16 ===== */
  10402. /* ===== Inline Function Start for 3.136. SUB8 ===== */
  10403. /**
  10404. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  10405. * \brief SUB8 (SIMD 8-bit Subtraction)
  10406. * \details
  10407. * **Type**: SIMD
  10408. *
  10409. * **Syntax**:\n
  10410. * ~~~
  10411. * SUB8 Rd, Rs1, Rs2
  10412. * ~~~
  10413. *
  10414. * **Purpose**:\n
  10415. * Do 8-bit integer element subtractions simultaneously.
  10416. *
  10417. * **Description**:\n
  10418. * This instruction subtracts the 8-bit integer elements in Rs2 from the 8-bit integer
  10419. * elements in Rs1, and then writes the result to Rd.
  10420. *
  10421. * **Note**:\n
  10422. * This instruction can be used for either signed or unsigned subtraction.
  10423. *
  10424. * **Operations**:\n
  10425. * ~~~
  10426. * Rd.B[x] = Rs1.B[x] - Rs2.B[x];
  10427. * for RV32: x=3...0,
  10428. * for RV64: x=7...0
  10429. * ~~~
  10430. *
  10431. * \param [in] a unsigned long type of value stored in a
  10432. * \param [in] b unsigned long type of value stored in b
  10433. * \return value stored in unsigned long type
  10434. */
  10435. __STATIC_FORCEINLINE unsigned long __RV_SUB8(unsigned long a, unsigned long b)
  10436. {
  10437. unsigned long result;
  10438. __ASM volatile("sub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10439. return result;
  10440. }
  10441. /* ===== Inline Function End for 3.136. SUB8 ===== */
  10442. /* ===== Inline Function Start for 3.137. SUB16 ===== */
  10443. /**
  10444. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  10445. * \brief SUB16 (SIMD 16-bit Subtraction)
  10446. * \details
  10447. * **Type**: SIMD
  10448. *
  10449. * **Syntax**:\n
  10450. * ~~~
  10451. * SUB16 Rd, Rs1, Rs2
  10452. * ~~~
  10453. *
  10454. * **Purpose**:\n
  10455. * Do 16-bit integer element subtractions simultaneously.
  10456. *
  10457. * **Description**:\n
  10458. * This instruction subtracts the 16-bit integer elements in Rs2 from the 16-bit integer
  10459. * elements in Rs1, and then writes the result to Rd.
  10460. *
  10461. * **Note**:\n
  10462. * This instruction can be used for either signed or unsigned subtraction.
  10463. *
  10464. * **Operations**:\n
  10465. * ~~~
  10466. * Rd.H[x] = Rs1.H[x] - Rs2.H[x];
  10467. * for RV32: x=1...0,
  10468. * for RV64: x=3...0
  10469. * ~~~
  10470. *
  10471. * \param [in] a unsigned long type of value stored in a
  10472. * \param [in] b unsigned long type of value stored in b
  10473. * \return value stored in unsigned long type
  10474. */
  10475. __STATIC_FORCEINLINE unsigned long __RV_SUB16(unsigned long a, unsigned long b)
  10476. {
  10477. unsigned long result;
  10478. __ASM volatile("sub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10479. return result;
  10480. }
  10481. /* ===== Inline Function End for 3.137. SUB16 ===== */
  10482. /* ===== Inline Function Start for 3.138. SUB64 ===== */
  10483. /**
  10484. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  10485. * \brief SUB64 (64-bit Subtraction)
  10486. * \details
  10487. * **Type**: DSP (64-bit Profile)
  10488. *
  10489. * **Syntax**:\n
  10490. * ~~~
  10491. * SUB64 Rd, Rs1, Rs2
  10492. * ~~~
  10493. *
  10494. * **Purpose**:\n
  10495. * Perform a 64-bit signed or unsigned integer subtraction.
  10496. *
  10497. * **RV32 Description**:\n
  10498. * This instruction subtracts the 64-bit integer of an even/odd pair of registers
  10499. * specified by Rs2(4,1) from the 64-bit integer of an even/odd pair of registers specified by Rs1(4,1),
  10500. * and then writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1).
  10501. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  10502. * includes register 2d and 2d+1.
  10503. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  10504. * register of the pair contains the low 32-bit of the operand.
  10505. *
  10506. * **RV64 Description**:\n
  10507. * This instruction subtracts the 64-bit integer of Rs2 from the 64-bit integer of Rs1,
  10508. * and then writes the 64-bit result to Rd.
  10509. *
  10510. * **Note**:\n
  10511. * This instruction can be used for either signed or unsigned subtraction.
  10512. *
  10513. * **Operations**:\n
  10514. * ~~~
  10515. * * RV32:
  10516. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  10517. * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
  10518. * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
  10519. * R[t_H].R[t_L] = R[a_H].R[a_L] - R[b_H].R[b_L];
  10520. * * RV64:
  10521. * Rd = Rs1 - Rs2;
  10522. * ~~~
  10523. *
  10524. * \param [in] a unsigned long long type of value stored in a
  10525. * \param [in] b unsigned long long type of value stored in b
  10526. * \return value stored in unsigned long long type
  10527. */
  10528. __STATIC_FORCEINLINE unsigned long long __RV_SUB64(unsigned long long a, unsigned long long b)
  10529. {
  10530. unsigned long long result;
  10531. __ASM volatile("sub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10532. return result;
  10533. }
  10534. /* ===== Inline Function End for 3.138. SUB64 ===== */
  10535. /* ===== Inline Function Start for 3.139.1. SUNPKD810 ===== */
  10536. /**
  10537. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  10538. * \brief SUNPKD810 (Signed Unpacking Bytes 1 & 0)
  10539. * \details
  10540. * **Type**: DSP
  10541. *
  10542. * **Syntax**:\n
  10543. * ~~~
  10544. * SUNPKD8xy Rd, Rs1
  10545. * xy = {10, 20, 30, 31, 32}
  10546. * ~~~
  10547. *
  10548. * **Purpose**:\n
  10549. * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
  10550. * of 32-bit chunks in a register.
  10551. *
  10552. * **Description**:\n
  10553. * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  10554. * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
  10555. * chunks in Rd.
  10556. *
  10557. * **Operations**:\n
  10558. * ~~~
  10559. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
  10560. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
  10561. * // SUNPKD810, x=1,y=0
  10562. * // SUNPKD820, x=2,y=0
  10563. * // SUNPKD830, x=3,y=0
  10564. * // SUNPKD831, x=3,y=1
  10565. * // SUNPKD832, x=3,y=2
  10566. * for RV32: m=0,
  10567. * for RV64: m=1...0
  10568. * ~~~
  10569. *
  10570. * \param [in] a unsigned long type of value stored in a
  10571. * \return value stored in unsigned long type
  10572. */
  10573. __STATIC_FORCEINLINE unsigned long __RV_SUNPKD810(unsigned long a)
  10574. {
  10575. unsigned long result;
  10576. __ASM volatile("sunpkd810 %0, %1" : "=r"(result) : "r"(a));
  10577. return result;
  10578. }
  10579. /* ===== Inline Function End for 3.139.1. SUNPKD810 ===== */
  10580. /* ===== Inline Function Start for 3.139.2. SUNPKD820 ===== */
  10581. /**
  10582. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  10583. * \brief SUNPKD820 (Signed Unpacking Bytes 2 & 0)
  10584. * \details
  10585. * **Type**: DSP
  10586. *
  10587. * **Syntax**:\n
  10588. * ~~~
  10589. * SUNPKD8xy Rd, Rs1
  10590. * xy = {10, 20, 30, 31, 32}
  10591. * ~~~
  10592. *
  10593. * **Purpose**:\n
  10594. * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
  10595. * of 32-bit chunks in a register.
  10596. *
  10597. * **Description**:\n
  10598. * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  10599. * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
  10600. * chunks in Rd.
  10601. *
  10602. * **Operations**:\n
  10603. * ~~~
  10604. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
  10605. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
  10606. * // SUNPKD810, x=1,y=0
  10607. * // SUNPKD820, x=2,y=0
  10608. * // SUNPKD830, x=3,y=0
  10609. * // SUNPKD831, x=3,y=1
  10610. * // SUNPKD832, x=3,y=2
  10611. * for RV32: m=0,
  10612. * for RV64: m=1...0
  10613. * ~~~
  10614. *
  10615. * \param [in] a unsigned long type of value stored in a
  10616. * \return value stored in unsigned long type
  10617. */
  10618. __STATIC_FORCEINLINE unsigned long __RV_SUNPKD820(unsigned long a)
  10619. {
  10620. unsigned long result;
  10621. __ASM volatile("sunpkd820 %0, %1" : "=r"(result) : "r"(a));
  10622. return result;
  10623. }
  10624. /* ===== Inline Function End for 3.139.2. SUNPKD820 ===== */
  10625. /* ===== Inline Function Start for 3.139.3. SUNPKD830 ===== */
  10626. /**
  10627. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  10628. * \brief SUNPKD830 (Signed Unpacking Bytes 3 & 0)
  10629. * \details
  10630. * **Type**: DSP
  10631. *
  10632. * **Syntax**:\n
  10633. * ~~~
  10634. * SUNPKD8xy Rd, Rs1
  10635. * xy = {10, 20, 30, 31, 32}
  10636. * ~~~
  10637. *
  10638. * **Purpose**:\n
  10639. * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
  10640. * of 32-bit chunks in a register.
  10641. *
  10642. * **Description**:\n
  10643. * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  10644. * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
  10645. * chunks in Rd.
  10646. *
  10647. * **Operations**:\n
  10648. * ~~~
  10649. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
  10650. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
  10651. * // SUNPKD810, x=1,y=0
  10652. * // SUNPKD820, x=2,y=0
  10653. * // SUNPKD830, x=3,y=0
  10654. * // SUNPKD831, x=3,y=1
  10655. * // SUNPKD832, x=3,y=2
  10656. * for RV32: m=0,
  10657. * for RV64: m=1...0
  10658. * ~~~
  10659. *
  10660. * \param [in] a unsigned long type of value stored in a
  10661. * \return value stored in unsigned long type
  10662. */
  10663. __STATIC_FORCEINLINE unsigned long __RV_SUNPKD830(unsigned long a)
  10664. {
  10665. unsigned long result;
  10666. __ASM volatile("sunpkd830 %0, %1" : "=r"(result) : "r"(a));
  10667. return result;
  10668. }
  10669. /* ===== Inline Function End for 3.139.3. SUNPKD830 ===== */
  10670. /* ===== Inline Function Start for 3.139.4. SUNPKD831 ===== */
  10671. /**
  10672. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  10673. * \brief SUNPKD831 (Signed Unpacking Bytes 3 & 1)
  10674. * \details
  10675. * **Type**: DSP
  10676. *
  10677. * **Syntax**:\n
  10678. * ~~~
  10679. * SUNPKD8xy Rd, Rs1
  10680. * xy = {10, 20, 30, 31, 32}
  10681. * ~~~
  10682. *
  10683. * **Purpose**:\n
  10684. * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
  10685. * of 32-bit chunks in a register.
  10686. *
  10687. * **Description**:\n
  10688. * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  10689. * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
  10690. * chunks in Rd.
  10691. *
  10692. * **Operations**:\n
  10693. * ~~~
  10694. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
  10695. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
  10696. * // SUNPKD810, x=1,y=0
  10697. * // SUNPKD820, x=2,y=0
  10698. * // SUNPKD830, x=3,y=0
  10699. * // SUNPKD831, x=3,y=1
  10700. * // SUNPKD832, x=3,y=2
  10701. * for RV32: m=0,
  10702. * for RV64: m=1...0
  10703. * ~~~
  10704. *
  10705. * \param [in] a unsigned long type of value stored in a
  10706. * \return value stored in unsigned long type
  10707. */
  10708. __STATIC_FORCEINLINE unsigned long __RV_SUNPKD831(unsigned long a)
  10709. {
  10710. unsigned long result;
  10711. __ASM volatile("sunpkd831 %0, %1" : "=r"(result) : "r"(a));
  10712. return result;
  10713. }
  10714. /* ===== Inline Function End for 3.139.4. SUNPKD831 ===== */
  10715. /* ===== Inline Function Start for 3.139.5. SUNPKD832 ===== */
  10716. /**
  10717. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  10718. * \brief SUNPKD832 (Signed Unpacking Bytes 3 & 2)
  10719. * \details
  10720. * **Type**: DSP
  10721. *
  10722. * **Syntax**:\n
  10723. * ~~~
  10724. * SUNPKD8xy Rd, Rs1
  10725. * xy = {10, 20, 30, 31, 32}
  10726. * ~~~
  10727. *
  10728. * **Purpose**:\n
  10729. * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
  10730. * of 32-bit chunks in a register.
  10731. *
  10732. * **Description**:\n
  10733. * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  10734. * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
  10735. * chunks in Rd.
  10736. *
  10737. * **Operations**:\n
  10738. * ~~~
  10739. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
  10740. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
  10741. * // SUNPKD810, x=1,y=0
  10742. * // SUNPKD820, x=2,y=0
  10743. * // SUNPKD830, x=3,y=0
  10744. * // SUNPKD831, x=3,y=1
  10745. * // SUNPKD832, x=3,y=2
  10746. * for RV32: m=0,
  10747. * for RV64: m=1...0
  10748. * ~~~
  10749. *
  10750. * \param [in] a unsigned long type of value stored in a
  10751. * \return value stored in unsigned long type
  10752. */
  10753. __STATIC_FORCEINLINE unsigned long __RV_SUNPKD832(unsigned long a)
  10754. {
  10755. unsigned long result;
  10756. __ASM volatile("sunpkd832 %0, %1" : "=r"(result) : "r"(a));
  10757. return result;
  10758. }
  10759. /* ===== Inline Function End for 3.139.5. SUNPKD832 ===== */
  10760. /* ===== Inline Function Start for 3.140. SWAP8 ===== */
  10761. /**
  10762. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  10763. * \brief SWAP8 (Swap Byte within Halfword)
  10764. * \details
  10765. * **Type**: DSP
  10766. *
  10767. * **Syntax**:\n
  10768. * ~~~
  10769. * SWAP8 Rd, Rs1
  10770. * ~~~
  10771. *
  10772. * **Purpose**:\n
  10773. * Swap the bytes within each halfword of a register.
  10774. *
  10775. * **Description**:\n
  10776. * This instruction swaps the bytes within each halfword of Rs1 and writes the result to
  10777. * Rd.
  10778. *
  10779. * **Operations**:\n
  10780. * ~~~
  10781. * Rd.H[x] = CONCAT(Rs1.H[x][7:0],Rs1.H[x][15:8]);
  10782. * for RV32: x=1...0,
  10783. * for RV64: x=3...0
  10784. * ~~~
  10785. *
  10786. * \param [in] a unsigned long type of value stored in a
  10787. * \return value stored in unsigned long type
  10788. */
  10789. __STATIC_FORCEINLINE unsigned long __RV_SWAP8(unsigned long a)
  10790. {
  10791. unsigned long result;
  10792. __ASM volatile("swap8 %0, %1" : "=r"(result) : "r"(a));
  10793. return result;
  10794. }
  10795. /* ===== Inline Function End for 3.140. SWAP8 ===== */
  10796. /* ===== Inline Function Start for 3.141. SWAP16 ===== */
  10797. /**
  10798. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  10799. * \brief SWAP16 (Swap Halfword within Word)
  10800. * \details
  10801. * **Type**: DSP
  10802. *
  10803. * **Syntax**:\n
  10804. * ~~~
  10805. * SWAP16 Rd, Rs1
  10806. * ~~~
  10807. *
  10808. * **Purpose**:\n
  10809. * Swap the 16-bit halfwords within each word of a register.
  10810. *
  10811. * **Description**:\n
  10812. * This instruction swaps the 16-bit halfwords within each word of Rs1 and writes the
  10813. * result to Rd.
  10814. *
  10815. * **Operations**:\n
  10816. * ~~~
  10817. * Rd.W[x] = CONCAT(Rs1.W[x][15:0],Rs1.H[x][31:16]);
  10818. * for RV32: x=0,
  10819. * for RV64: x=1...0
  10820. * ~~~
  10821. *
  10822. * \param [in] a unsigned long type of value stored in a
  10823. * \return value stored in unsigned long type
  10824. */
  10825. __STATIC_FORCEINLINE unsigned long __RV_SWAP16(unsigned long a)
  10826. {
  10827. unsigned long result;
  10828. __ASM volatile("swap16 %0, %1" : "=r"(result) : "r"(a));
  10829. return result;
  10830. }
  10831. /* ===== Inline Function End for 3.141. SWAP16 ===== */
  10832. /* ===== Inline Function Start for 3.142. UCLIP8 ===== */
  10833. /**
  10834. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  10835. * \brief UCLIP8 (SIMD 8-bit Unsigned Clip Value)
  10836. * \details
  10837. * **Type**: SIMD
  10838. *
  10839. * **Syntax**:\n
  10840. * ~~~
  10841. * UCLIP8 Rt, Ra, imm3u
  10842. * ~~~
  10843. *
  10844. * **Purpose**:\n
  10845. * Limit the 8-bit signed elements of a register into an unsigned range simultaneously.
  10846. *
  10847. * **Description**:\n
  10848. * This instruction limits the 8-bit signed elements stored in Rs1 into an unsigned integer
  10849. * range between 2^imm3u-1 and 0, and writes the limited results to Rd. For example, if imm3u is 3, the 8-
  10850. * bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit to 1.
  10851. *
  10852. * **Operations**:\n
  10853. * ~~~
  10854. * src = Rs1.H[x];
  10855. * if (src > (2^imm3u)-1) {
  10856. * src = (2^imm3u)-1;
  10857. * OV = 1;
  10858. * } else if (src < 0) {
  10859. * src = 0;
  10860. * OV = 1;
  10861. * }
  10862. * Rd.H[x] = src;
  10863. * for RV32: x=3...0,
  10864. * for RV64: x=7...0
  10865. * ~~~
  10866. *
  10867. * \param [in] a unsigned long type of value stored in a
  10868. * \param [in] b unsigned int type of value stored in b
  10869. * \return value stored in unsigned long type
  10870. */
  10871. #define __RV_UCLIP8(a, b) \
  10872. ({ \
  10873. unsigned long result; \
  10874. unsigned long __a = (unsigned long)(a); \
  10875. __ASM volatile("uclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  10876. result; \
  10877. })
  10878. /* ===== Inline Function End for 3.142. UCLIP8 ===== */
  10879. /* ===== Inline Function Start for 3.143. UCLIP16 ===== */
  10880. /**
  10881. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  10882. * \brief UCLIP16 (SIMD 16-bit Unsigned Clip Value)
  10883. * \details
  10884. * **Type**: SIMD
  10885. *
  10886. * **Syntax**:\n
  10887. * ~~~
  10888. * UCLIP16 Rt, Ra, imm4u
  10889. * ~~~
  10890. *
  10891. * **Purpose**:\n
  10892. * Limit the 16-bit signed elements of a register into an unsigned range simultaneously.
  10893. *
  10894. * **Description**:\n
  10895. * This instruction limits the 16-bit signed elements stored in Rs1 into an unsigned
  10896. * integer range between 2imm4u-1 and 0, and writes the limited results to Rd. For example, if imm4u is
  10897. * 3, the 16-bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit
  10898. * to 1.
  10899. *
  10900. * **Operations**:\n
  10901. * ~~~
  10902. * src = Rs1.H[x];
  10903. * if (src > (2^imm4u)-1) {
  10904. * src = (2^imm4u)-1;
  10905. * OV = 1;
  10906. * } else if (src < 0) {
  10907. * src = 0;
  10908. * OV = 1;
  10909. * }
  10910. * Rd.H[x] = src;
  10911. * for RV32: x=1...0,
  10912. * for RV64: x=3...0
  10913. * ~~~
  10914. *
  10915. * \param [in] a unsigned long type of value stored in a
  10916. * \param [in] b unsigned int type of value stored in b
  10917. * \return value stored in unsigned long type
  10918. */
  10919. #define __RV_UCLIP16(a, b) \
  10920. ({ \
  10921. unsigned long result; \
  10922. unsigned long __a = (unsigned long)(a); \
  10923. __ASM volatile("uclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  10924. result; \
  10925. })
  10926. /* ===== Inline Function End for 3.143. UCLIP16 ===== */
  10927. /* ===== Inline Function Start for 3.144. UCLIP32 ===== */
  10928. /**
  10929. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
  10930. * \brief UCLIP32 (SIMD 32-bit Unsigned Clip Value)
  10931. * \details
  10932. * **Type**: SIMD
  10933. *
  10934. * **Syntax**:\n
  10935. * ~~~
  10936. * UCLIP32 Rd, Rs1, imm5u[4:0]
  10937. * ~~~
  10938. *
  10939. * **Purpose**:\n
  10940. * Limit the 32-bit signed integer elements of a register into an unsigned range
  10941. * simultaneously.
  10942. *
  10943. * **Description**:\n
  10944. * This instruction limits the 32-bit signed integer elements stored in Rs1 into an
  10945. * unsigned integer range between 2imm5u-1 and 0, and writes the limited results to Rd. For example, if
  10946. * imm5u is 3, the 32-bit input values should be saturated between 7 and 0. If saturation is performed,
  10947. * set OV bit to 1.
  10948. *
  10949. * **Operations**:\n
  10950. * ~~~
  10951. * src = Rs1.W[x];
  10952. * if (src > (2^imm5u)-1) {
  10953. * src = (2^imm5u)-1;
  10954. * OV = 1;
  10955. * } else if (src < 0) {
  10956. * src = 0;
  10957. * OV = 1;
  10958. * }
  10959. * Rd.W[x] = src
  10960. * for RV32: x=0,
  10961. * for RV64: x=1...0
  10962. * ~~~
  10963. *
  10964. * \param [in] a unsigned long type of value stored in a
  10965. * \param [in] b unsigned int type of value stored in b
  10966. * \return value stored in unsigned long type
  10967. */
  10968. #define __RV_UCLIP32(a, b) \
  10969. ({ \
  10970. unsigned long result; \
  10971. unsigned long __a = (unsigned long)(a); \
  10972. __ASM volatile("uclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  10973. result; \
  10974. })
  10975. /* ===== Inline Function End for 3.144. UCLIP32 ===== */
  10976. /* ===== Inline Function Start for 3.145. UCMPLE8 ===== */
  10977. /**
  10978. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
  10979. * \brief UCMPLE8 (SIMD 8-bit Unsigned Compare Less Than & Equal)
  10980. * \details
  10981. * **Type**: SIMD
  10982. *
  10983. * **Syntax**:\n
  10984. * ~~~
  10985. * UCMPLE8 Rd, Rs1, Rs2
  10986. * ~~~
  10987. *
  10988. * **Purpose**:\n
  10989. * Do 8-bit unsigned integer elements less than & equal comparisons simultaneously.
  10990. *
  10991. * **Description**:\n
  10992. * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
  10993. * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it
  10994. * is true, the result is 0xFF; otherwise, the result is 0x0. The four comparison results are written to
  10995. * Rd.
  10996. *
  10997. * **Operations**:\n
  10998. * ~~~
  10999. * Rd.B[x] = (Rs1.B[x] <=u Rs2.B[x])? 0xff : 0x0;
  11000. * for RV32: x=3...0,
  11001. * for RV64: x=7...0
  11002. * ~~~
  11003. *
  11004. * \param [in] a unsigned long type of value stored in a
  11005. * \param [in] b unsigned long type of value stored in b
  11006. * \return value stored in unsigned long type
  11007. */
  11008. __STATIC_FORCEINLINE unsigned long __RV_UCMPLE8(unsigned long a, unsigned long b)
  11009. {
  11010. unsigned long result;
  11011. __ASM volatile("ucmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11012. return result;
  11013. }
  11014. /* ===== Inline Function End for 3.145. UCMPLE8 ===== */
  11015. /* ===== Inline Function Start for 3.146. UCMPLE16 ===== */
  11016. /**
  11017. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
  11018. * \brief UCMPLE16 (SIMD 16-bit Unsigned Compare Less Than & Equal)
  11019. * \details
  11020. * **Type**: SIMD
  11021. *
  11022. * **Syntax**:\n
  11023. * ~~~
  11024. * UCMPLE16 Rd, Rs1, Rs2
  11025. * ~~~
  11026. *
  11027. * **Purpose**:\n
  11028. * Do 16-bit unsigned integer elements less than & equal comparisons simultaneously.
  11029. *
  11030. * **Description**:\n
  11031. * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
  11032. * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it
  11033. * is true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are
  11034. * written to Rd.
  11035. *
  11036. * **Operations**:\n
  11037. * ~~~
  11038. * Rd.H[x] = (Rs1.H[x] <=u Rs2.H[x])? 0xffff : 0x0;
  11039. * for RV32: x=1...0,
  11040. * for RV64: x=3...0
  11041. * ~~~
  11042. *
  11043. * \param [in] a unsigned long type of value stored in a
  11044. * \param [in] b unsigned long type of value stored in b
  11045. * \return value stored in unsigned long type
  11046. */
  11047. __STATIC_FORCEINLINE unsigned long __RV_UCMPLE16(unsigned long a, unsigned long b)
  11048. {
  11049. unsigned long result;
  11050. __ASM volatile("ucmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11051. return result;
  11052. }
  11053. /* ===== Inline Function End for 3.146. UCMPLE16 ===== */
  11054. /* ===== Inline Function Start for 3.147. UCMPLT8 ===== */
  11055. /**
  11056. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
  11057. * \brief UCMPLT8 (SIMD 8-bit Unsigned Compare Less Than)
  11058. * \details
  11059. * **Type**: SIMD
  11060. *
  11061. * **Syntax**:\n
  11062. * ~~~
  11063. * UCMPLT8 Rd, Rs1, Rs2
  11064. * ~~~
  11065. *
  11066. * **Purpose**:\n
  11067. * Do 8-bit unsigned integer elements less than comparisons simultaneously.
  11068. *
  11069. * **Description**:\n
  11070. * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
  11071. * unsigned integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
  11072. * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
  11073. *
  11074. * **Operations**:\n
  11075. * ~~~
  11076. * Rd.B[x] = (Rs1.B[x] <u Rs2.B[x])? 0xff : 0x0;
  11077. * for RV32: x=3...0,
  11078. * for RV64: x=7...0
  11079. * ~~~
  11080. *
  11081. * \param [in] a unsigned long type of value stored in a
  11082. * \param [in] b unsigned long type of value stored in b
  11083. * \return value stored in unsigned long type
  11084. */
  11085. __STATIC_FORCEINLINE unsigned long __RV_UCMPLT8(unsigned long a, unsigned long b)
  11086. {
  11087. unsigned long result;
  11088. __ASM volatile("ucmplt8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11089. return result;
  11090. }
  11091. /* ===== Inline Function End for 3.147. UCMPLT8 ===== */
  11092. /* ===== Inline Function Start for 3.148. UCMPLT16 ===== */
  11093. /**
  11094. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
  11095. * \brief UCMPLT16 (SIMD 16-bit Unsigned Compare Less Than)
  11096. * \details
  11097. * **Type**: SIMD
  11098. *
  11099. * **Syntax**:\n
  11100. * ~~~
  11101. * UCMPLT16 Rd, Rs1, Rs2
  11102. * ~~~
  11103. *
  11104. * **Purpose**:\n
  11105. * Do 16-bit unsigned integer elements less than comparisons simultaneously.
  11106. *
  11107. * **Description**:\n
  11108. * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
  11109. * unsigned integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
  11110. * result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
  11111. *
  11112. * **Operations**:\n
  11113. * ~~~
  11114. * Rd.H[x] = (Rs1.H[x] <u Rs2.H[x])? 0xffff : 0x0;
  11115. * for RV32: x=1...0,
  11116. * for RV64: x=3...0
  11117. * ~~~
  11118. *
  11119. * \param [in] a unsigned long type of value stored in a
  11120. * \param [in] b unsigned long type of value stored in b
  11121. * \return value stored in unsigned long type
  11122. */
  11123. __STATIC_FORCEINLINE unsigned long __RV_UCMPLT16(unsigned long a, unsigned long b)
  11124. {
  11125. unsigned long result;
  11126. __ASM volatile("ucmplt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11127. return result;
  11128. }
  11129. /* ===== Inline Function End for 3.148. UCMPLT16 ===== */
  11130. /* ===== Inline Function Start for 3.149. UKADD8 ===== */
  11131. /**
  11132. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  11133. * \brief UKADD8 (SIMD 8-bit Unsigned Saturating Addition)
  11134. * \details
  11135. * **Type**: SIMD
  11136. *
  11137. * **Syntax**:\n
  11138. * ~~~
  11139. * UKADD8 Rd, Rs1, Rs2
  11140. * ~~~
  11141. *
  11142. * **Purpose**:\n
  11143. * Do 8-bit unsigned integer element saturating additions simultaneously.
  11144. *
  11145. * **Description**:\n
  11146. * This instruction adds the 8-bit unsigned integer elements in Rs1 with the 8-bit
  11147. * unsigned integer elements in Rs2. If any of the results are beyond the 8-bit unsigned number range
  11148. * (0 <= RES <= 28-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
  11149. * written to Rd.
  11150. *
  11151. * **Operations**:\n
  11152. * ~~~
  11153. * res[x] = Rs1.B[x] + Rs2.B[x];
  11154. * if (res[x] > (2^8)-1) {
  11155. * res[x] = (2^8)-1;
  11156. * OV = 1;
  11157. * }
  11158. * Rd.B[x] = res[x];
  11159. * for RV32: x=3...0,
  11160. * for RV64: x=7...0
  11161. * ~~~
  11162. *
  11163. * \param [in] a unsigned long type of value stored in a
  11164. * \param [in] b unsigned long type of value stored in b
  11165. * \return value stored in unsigned long type
  11166. */
  11167. __STATIC_FORCEINLINE unsigned long __RV_UKADD8(unsigned long a, unsigned long b)
  11168. {
  11169. unsigned long result;
  11170. __ASM volatile("ukadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11171. return result;
  11172. }
  11173. /* ===== Inline Function End for 3.149. UKADD8 ===== */
  11174. /* ===== Inline Function Start for 3.150. UKADD16 ===== */
  11175. /**
  11176. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  11177. * \brief UKADD16 (SIMD 16-bit Unsigned Saturating Addition)
  11178. * \details
  11179. * **Type**: SIMD
  11180. *
  11181. * **Syntax**:\n
  11182. * ~~~
  11183. * UKADD16 Rd, Rs1, Rs2
  11184. * ~~~
  11185. *
  11186. * **Purpose**:\n
  11187. * Do 16-bit unsigned integer element saturating additions simultaneously.
  11188. *
  11189. * **Description**:\n
  11190. * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit
  11191. * unsigned integer elements in Rs2. If any of the results are beyond the 16-bit unsigned number
  11192. * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated
  11193. * results are written to Rd.
  11194. *
  11195. * **Operations**:\n
  11196. * ~~~
  11197. * res[x] = Rs1.H[x] + Rs2.H[x];
  11198. * if (res[x] > (2^16)-1) {
  11199. * res[x] = (2^16)-1;
  11200. * OV = 1;
  11201. * }
  11202. * Rd.H[x] = res[x];
  11203. * for RV32: x=1...0,
  11204. * for RV64: x=3...0
  11205. * ~~~
  11206. *
  11207. * \param [in] a unsigned long type of value stored in a
  11208. * \param [in] b unsigned long type of value stored in b
  11209. * \return value stored in unsigned long type
  11210. */
  11211. __STATIC_FORCEINLINE unsigned long __RV_UKADD16(unsigned long a, unsigned long b)
  11212. {
  11213. unsigned long result;
  11214. __ASM volatile("ukadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11215. return result;
  11216. }
  11217. /* ===== Inline Function End for 3.150. UKADD16 ===== */
  11218. /* ===== Inline Function Start for 3.151. UKADD64 ===== */
  11219. /**
  11220. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  11221. * \brief UKADD64 (64-bit Unsigned Saturating Addition)
  11222. * \details
  11223. * **Type**: DSP (64-bit Profile)
  11224. *
  11225. * **Syntax**:\n
  11226. * ~~~
  11227. * UKADD64 Rd, Rs1, Rs2
  11228. * ~~~
  11229. *
  11230. * **Purpose**:\n
  11231. * Add two 64-bit unsigned integers. The result is saturated to the U64 range.
  11232. *
  11233. * **RV32 Description**:\n
  11234. * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers
  11235. * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by
  11236. * Rs2(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the
  11237. * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers
  11238. * specified by Rd(4,1).
  11239. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  11240. * includes register 2d and 2d+1.
  11241. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  11242. * of the pair contains the low 32-bit of the result.
  11243. *
  11244. * **RV64 Description**:\n
  11245. * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned
  11246. * integer in Rs2. If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to
  11247. * the range and the OV bit is set to 1. The saturated result is written to Rd.
  11248. *
  11249. * **Operations**:\n
  11250. * ~~~
  11251. * * RV32:
  11252. * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
  11253. * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
  11254. * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
  11255. * result = R[a_H].R[a_L] + R[b_H].R[b_L];
  11256. * if (result > (2^64)-1) {
  11257. * result = (2^64)-1; OV = 1;
  11258. * }
  11259. * R[t_H].R[t_L] = result;
  11260. * * RV64:
  11261. * result = Rs1 + Rs2;
  11262. * if (result > (2^64)-1) {
  11263. * result = (2^64)-1; OV = 1;
  11264. * }
  11265. * Rd = result;
  11266. * ~~~
  11267. *
  11268. * \param [in] a unsigned long long type of value stored in a
  11269. * \param [in] b unsigned long long type of value stored in b
  11270. * \return value stored in unsigned long long type
  11271. */
  11272. __STATIC_FORCEINLINE unsigned long long __RV_UKADD64(unsigned long long a, unsigned long long b)
  11273. {
  11274. unsigned long long result;
  11275. __ASM volatile("ukadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11276. return result;
  11277. }
  11278. /* ===== Inline Function End for 3.151. UKADD64 ===== */
  11279. /* ===== Inline Function Start for 3.152. UKADDH ===== */
  11280. /**
  11281. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
  11282. * \brief UKADDH (Unsigned Addition with U16 Saturation)
  11283. * \details
  11284. * **Type**: DSP
  11285. *
  11286. * **Syntax**:\n
  11287. * ~~~
  11288. * UKADDH Rd, Rs1, Rs2
  11289. * ~~~
  11290. *
  11291. * **Purpose**:\n
  11292. * Add the unsigned lower 32-bit content of two registers with U16 saturation.
  11293. *
  11294. * **Description**:\n
  11295. * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit
  11296. * content of Rs2. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then
  11297. * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
  11298. *
  11299. * **Operations**:\n
  11300. * ~~~
  11301. * tmp = Rs1.W[0] + Rs2.W[0];
  11302. * if (tmp > (2^16)-1) {
  11303. * tmp = (2^16)-1;
  11304. * OV = 1;
  11305. * }
  11306. * Rd = SE(tmp[15:0]);
  11307. * ~~~
  11308. *
  11309. * \param [in] a unsigned int type of value stored in a
  11310. * \param [in] b unsigned int type of value stored in b
  11311. * \return value stored in unsigned long type
  11312. */
  11313. __STATIC_FORCEINLINE unsigned long __RV_UKADDH(unsigned int a, unsigned int b)
  11314. {
  11315. unsigned long result;
  11316. __ASM volatile("ukaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11317. return result;
  11318. }
  11319. /* ===== Inline Function End for 3.152. UKADDH ===== */
  11320. /* ===== Inline Function Start for 3.153. UKADDW ===== */
  11321. /**
  11322. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  11323. * \brief UKADDW (Unsigned Addition with U32 Saturation)
  11324. * \details
  11325. * **Type**: DSP
  11326. *
  11327. * **Syntax**:\n
  11328. * ~~~
  11329. * UKADDW Rd, Rs1, Rs2
  11330. * ~~~
  11331. *
  11332. * **Purpose**:\n
  11333. * Add the unsigned lower 32-bit content of two registers with U32 saturation.
  11334. *
  11335. * **Description**:\n
  11336. * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit
  11337. * content of Rs2. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then
  11338. * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
  11339. *
  11340. * **Operations**:\n
  11341. * ~~~
  11342. * tmp = Rs1.W[0] + Rs2.W[0];
  11343. * if (tmp > (2^32)-1) {
  11344. * tmp[31:0] = (2^32)-1;
  11345. * OV = 1;
  11346. * }
  11347. * Rd = tmp[31:0]; // RV32
  11348. * Rd = SE(tmp[31:0]); // RV64
  11349. * ~~~
  11350. *
  11351. * \param [in] a unsigned int type of value stored in a
  11352. * \param [in] b unsigned int type of value stored in b
  11353. * \return value stored in unsigned long type
  11354. */
  11355. __STATIC_FORCEINLINE unsigned long __RV_UKADDW(unsigned int a, unsigned int b)
  11356. {
  11357. unsigned long result;
  11358. __ASM volatile("ukaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11359. return result;
  11360. }
  11361. /* ===== Inline Function End for 3.153. UKADDW ===== */
  11362. /* ===== Inline Function Start for 3.154. UKCRAS16 ===== */
  11363. /**
  11364. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  11365. * \brief UKCRAS16 (SIMD 16-bit Unsigned Saturating Cross Addition & Subtraction)
  11366. * \details
  11367. * **Type**: SIMD
  11368. *
  11369. * **Syntax**:\n
  11370. * ~~~
  11371. * UKCRAS16 Rd, Rs1, Rs2
  11372. * ~~~
  11373. *
  11374. * **Purpose**:\n
  11375. * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned
  11376. * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed
  11377. * positions in 32-bit chunks.
  11378. *
  11379. * **Description**:\n
  11380. * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in
  11381. * Rs1 with the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it
  11382. * subtracts the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit
  11383. * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit
  11384. * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1.
  11385. * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit
  11386. * chunks in Rd for subtraction.
  11387. *
  11388. * **Operations**:\n
  11389. * ~~~
  11390. * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
  11391. * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
  11392. * if (res1 > (2^16)-1) {
  11393. * res1 = (2^16)-1;
  11394. * OV = 1;
  11395. * }
  11396. * if (res2 < 0) {
  11397. * res2 = 0;
  11398. * OV = 1;
  11399. * }
  11400. * Rd.W[x][31:16] = res1;
  11401. * Rd.W[x][15:0] = res2;
  11402. * for RV32, x=0
  11403. * for RV64, x=1...0
  11404. * ~~~
  11405. *
  11406. * \param [in] a unsigned long type of value stored in a
  11407. * \param [in] b unsigned long type of value stored in b
  11408. * \return value stored in unsigned long type
  11409. */
  11410. __STATIC_FORCEINLINE unsigned long __RV_UKCRAS16(unsigned long a, unsigned long b)
  11411. {
  11412. unsigned long result;
  11413. __ASM volatile("ukcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11414. return result;
  11415. }
  11416. /* ===== Inline Function End for 3.154. UKCRAS16 ===== */
  11417. /* ===== Inline Function Start for 3.155. UKCRSA16 ===== */
  11418. /**
  11419. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  11420. * \brief UKCRSA16 (SIMD 16-bit Unsigned Saturating Cross Subtraction & Addition)
  11421. * \details
  11422. * **Type**: SIMD
  11423. *
  11424. * **Syntax**:\n
  11425. * ~~~
  11426. * UKCRSA16 Rd, Rs1, Rs2
  11427. * ~~~
  11428. *
  11429. * **Purpose**:\n
  11430. * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned
  11431. * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from crossed
  11432. * positions in 32-bit chunks.
  11433. *
  11434. * **Description**:\n
  11435. * This instruction subtracts the 16-bit unsigned integer element in [15:0] of 32-bit
  11436. * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the
  11437. * same time, it adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 with the 16-
  11438. * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the
  11439. * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set
  11440. * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of
  11441. * 32-bit chunks in Rd for addition.
  11442. *
  11443. * **Operations**:\n
  11444. * ~~~
  11445. * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
  11446. * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
  11447. * if (res1 < 0) {
  11448. * res1 = 0;
  11449. * OV = 1;
  11450. * } else if (res2 > (2^16)-1) {
  11451. * res2 = (2^16)-1;
  11452. * OV = 1;
  11453. * }
  11454. * Rd.W[x][31:16] = res1;
  11455. * Rd.W[x][15:0] = res2;
  11456. * for RV32, x=0
  11457. * for RV64, x=1...0
  11458. * ~~~
  11459. *
  11460. * \param [in] a unsigned long type of value stored in a
  11461. * \param [in] b unsigned long type of value stored in b
  11462. * \return value stored in unsigned long type
  11463. */
  11464. __STATIC_FORCEINLINE unsigned long __RV_UKCRSA16(unsigned long a, unsigned long b)
  11465. {
  11466. unsigned long result;
  11467. __ASM volatile("ukcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11468. return result;
  11469. }
  11470. /* ===== Inline Function End for 3.155. UKCRSA16 ===== */
  11471. /* ===== Inline Function Start for 3.156. UKMAR64 ===== */
  11472. /**
  11473. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  11474. * \brief UKMAR64 (Unsigned Multiply and Saturating Add to 64-Bit Data)
  11475. * \details
  11476. * **Type**: DSP (64-bit Profile)
  11477. *
  11478. * **Syntax**:\n
  11479. * ~~~
  11480. * UKMAR64 Rd, Rs1, Rs2
  11481. * ~~~
  11482. *
  11483. * **Purpose**:\n
  11484. * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication
  11485. * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is
  11486. * saturated to the U64 range and written back to the pair of registers (RV32) or the register (RV64).
  11487. *
  11488. * **RV32 Description**:\n
  11489. * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
  11490. * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers
  11491. * specified by Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the U64 number
  11492. * range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is
  11493. * written back to the even/odd pair of registers specified by Rd(4,1).
  11494. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  11495. * includes register 2d and 2d+1.
  11496. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  11497. * of the pair contains the low 32-bit of the result.
  11498. *
  11499. * **RV64 Description**:\n
  11500. * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
  11501. * It adds the 64-bit multiplication results to the 64-bit unsigned data in Rd with unlimited precision. If
  11502. * the 64-bit addition result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the
  11503. * range and the OV bit is set to 1. The saturated result is written back to Rd.
  11504. *
  11505. * **Operations**:\n
  11506. * ~~~
  11507. * * RV32:
  11508. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  11509. * result = R[t_H].R[t_L] + (Rs1 * Rs2);
  11510. * if (result > (2^64)-1) {
  11511. * result = (2^64)-1; OV = 1;
  11512. * }
  11513. * R[t_H].R[t_L] = result;
  11514. * * RV64:
  11515. * // `result` has unlimited precision
  11516. * result = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]);
  11517. * if (result > (2^64)-1) {
  11518. * result = (2^64)-1; OV = 1;
  11519. * }
  11520. * Rd = result;
  11521. * ~~~
  11522. *
  11523. * \param [in] t unsigned long long type of value stored in t
  11524. * \param [in] a unsigned long type of value stored in a
  11525. * \param [in] b unsigned long type of value stored in b
  11526. * \return value stored in unsigned long long type
  11527. */
  11528. __STATIC_FORCEINLINE unsigned long long __RV_UKMAR64(unsigned long long t, unsigned long a, unsigned long b)
  11529. {
  11530. __ASM volatile("ukmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  11531. return t;
  11532. }
  11533. /* ===== Inline Function End for 3.156. UKMAR64 ===== */
  11534. /* ===== Inline Function Start for 3.157. UKMSR64 ===== */
  11535. /**
  11536. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  11537. * \brief UKMSR64 (Unsigned Multiply and Saturating Subtract from 64-Bit Data)
  11538. * \details
  11539. * **Type**: DSP (64-bit Profile)
  11540. *
  11541. * **Syntax**:\n
  11542. * ~~~
  11543. * UKMSR64 Rd, Rs1, Rs2
  11544. * ~~~
  11545. *
  11546. * **Purpose**:\n
  11547. * Multiply the 32-bit unsigned elements in two registers and subtract the 64-bit
  11548. * multiplication results from the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64).
  11549. * The result is saturated to the U64 range and written back to the pair of registers (RV32) or a register
  11550. * (RV64).
  11551. *
  11552. * **RV32 Description**:\n
  11553. * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
  11554. * subtracts the 64-bit multiplication result from the 64-bit unsigned data of an even/odd pair of
  11555. * registers specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the
  11556. * U64 number range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The
  11557. * saturated result is written back to the even/odd pair of registers specified by Rd(4,1).
  11558. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  11559. * includes register 2d and 2d+1.
  11560. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  11561. * of the pair contains the low 32-bit of the result.
  11562. *
  11563. * **RV64 Description**:\n
  11564. * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
  11565. * It subtracts the 64-bit multiplication results from the 64-bit unsigned data of Rd with unlimited
  11566. * precision. If the 64-bit subtraction result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is
  11567. * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd.
  11568. *
  11569. * **Operations**:\n
  11570. * ~~~
  11571. * * RV32:
  11572. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  11573. * result = R[t_H].R[t_L] - (Rs1 u* Rs2);
  11574. * if (result < 0) {
  11575. * result = 0; OV = 1;
  11576. * }
  11577. * R[t_H].R[t_L] = result;
  11578. * * RV64:
  11579. * // `result` has unlimited precision
  11580. * result = Rd - (Rs1.W[0] u* Rs2.W[0]) - (Rs1.W[1] u* Rs2.W[1]);
  11581. * if (result < 0) {
  11582. * result = 0; OV = 1;
  11583. * }
  11584. * Rd = result;
  11585. * ~~~
  11586. *
  11587. * \param [in] t unsigned long long type of value stored in t
  11588. * \param [in] a unsigned long type of value stored in a
  11589. * \param [in] b unsigned long type of value stored in b
  11590. * \return value stored in unsigned long long type
  11591. */
  11592. __STATIC_FORCEINLINE unsigned long long __RV_UKMSR64(unsigned long long t, unsigned long a, unsigned long b)
  11593. {
  11594. __ASM volatile("ukmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  11595. return t;
  11596. }
  11597. /* ===== Inline Function End for 3.157. UKMSR64 ===== */
  11598. /* ===== Inline Function Start for 3.158. UKSTAS16 ===== */
  11599. /**
  11600. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  11601. * \brief UKSTAS16 (SIMD 16-bit Unsigned Saturating Straight Addition & Subtraction)
  11602. * \details
  11603. * **Type**: SIMD
  11604. *
  11605. * **Syntax**:\n
  11606. * ~~~
  11607. * UKSTAS16 Rd, Rs1, Rs2
  11608. * ~~~
  11609. *
  11610. * **Purpose**:\n
  11611. * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned
  11612. * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from
  11613. * corresponding positions in 32-bit chunks.
  11614. *
  11615. * **Description**:\n
  11616. * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in
  11617. * Rs1 with the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it
  11618. * subtracts the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit
  11619. * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit
  11620. * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1.
  11621. * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit
  11622. * chunks in Rd for subtraction.
  11623. *
  11624. * **Operations**:\n
  11625. * ~~~
  11626. * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
  11627. * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
  11628. * if (res1 > (2^16)-1) {
  11629. * res1 = (2^16)-1;
  11630. * OV = 1;
  11631. * }
  11632. * if (res2 < 0) {
  11633. * res2 = 0;
  11634. * OV = 1;
  11635. * }
  11636. * Rd.W[x][31:16] = res1;
  11637. * Rd.W[x][15:0] = res2;
  11638. * for RV32, x=0
  11639. * for RV64, x=1...0
  11640. * ~~~
  11641. *
  11642. * \param [in] a unsigned long type of value stored in a
  11643. * \param [in] b unsigned long type of value stored in b
  11644. * \return value stored in unsigned long type
  11645. */
  11646. __STATIC_FORCEINLINE unsigned long __RV_UKSTAS16(unsigned long a, unsigned long b)
  11647. {
  11648. unsigned long result;
  11649. __ASM volatile("ukstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11650. return result;
  11651. }
  11652. /* ===== Inline Function End for 3.158. UKSTAS16 ===== */
  11653. /* ===== Inline Function Start for 3.159. UKSTSA16 ===== */
  11654. /**
  11655. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  11656. * \brief UKSTSA16 (SIMD 16-bit Unsigned Saturating Straight Subtraction & Addition)
  11657. * \details
  11658. * **Type**: SIMD
  11659. *
  11660. * **Syntax**:\n
  11661. * ~~~
  11662. * UKSTSA16 Rd, Rs1, Rs2
  11663. * ~~~
  11664. *
  11665. * **Purpose**:\n
  11666. * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned
  11667. * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from
  11668. * corresponding positions in 32-bit chunks.
  11669. *
  11670. * **Description**:\n
  11671. * This instruction subtracts the 16-bit unsigned integer element in [31:16] of 32-bit
  11672. * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the
  11673. * same time, it adds the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 with the 16-
  11674. * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the
  11675. * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set
  11676. * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of
  11677. * 32-bit chunks in Rd for addition.
  11678. *
  11679. * **Operations**:\n
  11680. * ~~~
  11681. * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
  11682. * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
  11683. * if (res1 < 0) {
  11684. * res1 = 0;
  11685. * OV = 1;
  11686. * } else if (res2 > (2^16)-1) {
  11687. * res2 = (2^16)-1;
  11688. * OV = 1;
  11689. * }
  11690. * Rd.W[x][31:16] = res1;
  11691. * Rd.W[x][15:0] = res2;
  11692. * for RV32, x=0
  11693. * for RV64, x=1...0
  11694. * ~~~
  11695. *
  11696. * \param [in] a unsigned long type of value stored in a
  11697. * \param [in] b unsigned long type of value stored in b
  11698. * \return value stored in unsigned long type
  11699. */
  11700. __STATIC_FORCEINLINE unsigned long __RV_UKSTSA16(unsigned long a, unsigned long b)
  11701. {
  11702. unsigned long result;
  11703. __ASM volatile("ukstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11704. return result;
  11705. }
  11706. /* ===== Inline Function End for 3.159. UKSTSA16 ===== */
  11707. /* ===== Inline Function Start for 3.160. UKSUB8 ===== */
  11708. /**
  11709. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  11710. * \brief UKSUB8 (SIMD 8-bit Unsigned Saturating Subtraction)
  11711. * \details
  11712. * **Type**: SIMD
  11713. *
  11714. * **Syntax**:\n
  11715. * ~~~
  11716. * UKSUB8 Rd, Rs1, Rs2
  11717. * ~~~
  11718. *
  11719. * **Purpose**:\n
  11720. * Do 8-bit unsigned integer elements saturating subtractions simultaneously.
  11721. *
  11722. * **Description**:\n
  11723. * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit
  11724. * unsigned integer elements in Rs1. If any of the results are beyond the 8-bit unsigned number range
  11725. * (0 <= RES <= 28-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
  11726. * written to Rd.
  11727. *
  11728. * **Operations**:\n
  11729. * ~~~
  11730. * res[x] = Rs1.B[x] - Rs2.B[x];
  11731. * if (res[x] < 0) {
  11732. * res[x] = 0;
  11733. * OV = 1;
  11734. * }
  11735. * Rd.B[x] = res[x];
  11736. * for RV32: x=3...0,
  11737. * for RV64: x=7...0
  11738. * ~~~
  11739. *
  11740. * \param [in] a unsigned long type of value stored in a
  11741. * \param [in] b unsigned long type of value stored in b
  11742. * \return value stored in unsigned long type
  11743. */
  11744. __STATIC_FORCEINLINE unsigned long __RV_UKSUB8(unsigned long a, unsigned long b)
  11745. {
  11746. unsigned long result;
  11747. __ASM volatile("uksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11748. return result;
  11749. }
  11750. /* ===== Inline Function End for 3.160. UKSUB8 ===== */
  11751. /* ===== Inline Function Start for 3.161. UKSUB16 ===== */
  11752. /**
  11753. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  11754. * \brief UKSUB16 (SIMD 16-bit Unsigned Saturating Subtraction)
  11755. * \details
  11756. * **Type**: SIMD
  11757. *
  11758. * **Syntax**:\n
  11759. * ~~~
  11760. * UKSUB16 Rd, Rs1, Rs2
  11761. * ~~~
  11762. *
  11763. * **Purpose**:\n
  11764. * Do 16-bit unsigned integer elements saturating subtractions simultaneously.
  11765. *
  11766. * **Description**:\n
  11767. * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit
  11768. * unsigned integer elements in Rs1. If any of the results are beyond the 16-bit unsigned number
  11769. * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated
  11770. * results are written to Rd.
  11771. *
  11772. * **Operations**:\n
  11773. * ~~~
  11774. * res[x] = Rs1.H[x] - Rs2.H[x];
  11775. * if (res[x] < 0) {
  11776. * res[x] = 0;
  11777. * OV = 1;
  11778. * }
  11779. * Rd.H[x] = res[x];
  11780. * for RV32: x=1...0,
  11781. * for RV64: x=3...0
  11782. * ~~~
  11783. *
  11784. * \param [in] a unsigned long type of value stored in a
  11785. * \param [in] b unsigned long type of value stored in b
  11786. * \return value stored in unsigned long type
  11787. */
  11788. __STATIC_FORCEINLINE unsigned long __RV_UKSUB16(unsigned long a, unsigned long b)
  11789. {
  11790. unsigned long result;
  11791. __ASM volatile("uksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11792. return result;
  11793. }
  11794. /* ===== Inline Function End for 3.161. UKSUB16 ===== */
  11795. /* ===== Inline Function Start for 3.162. UKSUB64 ===== */
  11796. /**
  11797. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  11798. * \brief UKSUB64 (64-bit Unsigned Saturating Subtraction)
  11799. * \details
  11800. * **Type**: DSP (64-bit Profile)
  11801. *
  11802. * **Syntax**:\n
  11803. * ~~~
  11804. * UKSUB64 Rd, Rs1, Rs2
  11805. * ~~~
  11806. *
  11807. * **Purpose**:\n
  11808. * Perform a 64-bit signed integer subtraction. The result is saturated to the U64 range.
  11809. *
  11810. * **RV32 Description**:\n
  11811. * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of
  11812. * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers
  11813. * specified by Rs1(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is
  11814. * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd
  11815. * pair of registers specified by Rd(4,1).
  11816. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  11817. * includes register 2d and 2d+1.
  11818. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  11819. * register of the pair contains the low 32-bit of the operand.
  11820. *
  11821. * **RV64 Description**:\n
  11822. * This instruction subtracts the 64-bit unsigned integer of Rs2 from the 64-bit
  11823. * unsigned integer of an even/odd pair of Rs1. If the 64-bit result is beyond the U64 number range (0 <=
  11824. * U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is then written
  11825. * to Rd.
  11826. *
  11827. * **Operations**:\n
  11828. * ~~~
  11829. * * RV32:
  11830. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  11831. * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
  11832. * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
  11833. * result = R[a_H].R[a_L] - R[b_H].R[b_L];
  11834. * if (result < 0) {
  11835. * result = 0; OV = 1;
  11836. * }
  11837. * R[t_H].R[t_L] = result;
  11838. * * RV64
  11839. * result = Rs1 - Rs2;
  11840. * if (result < 0) {
  11841. * result = 0; OV = 1;
  11842. * }
  11843. * Rd = result;
  11844. * ~~~
  11845. *
  11846. * \param [in] a unsigned long long type of value stored in a
  11847. * \param [in] b unsigned long long type of value stored in b
  11848. * \return value stored in unsigned long long type
  11849. */
  11850. __STATIC_FORCEINLINE unsigned long long __RV_UKSUB64(unsigned long long a, unsigned long long b)
  11851. {
  11852. unsigned long long result;
  11853. __ASM volatile("uksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11854. return result;
  11855. }
  11856. /* ===== Inline Function End for 3.162. UKSUB64 ===== */
  11857. /* ===== Inline Function Start for 3.163. UKSUBH ===== */
  11858. /**
  11859. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
  11860. * \brief UKSUBH (Unsigned Subtraction with U16 Saturation)
  11861. * \details
  11862. * **Type**: DSP
  11863. *
  11864. * **Syntax**:\n
  11865. * ~~~
  11866. * UKSUBH Rd, Rs1, Rs2
  11867. * ~~~
  11868. *
  11869. * **Purpose**:\n
  11870. * Subtract the unsigned lower 32-bit content of two registers with U16 saturation.
  11871. *
  11872. * **Description**:\n
  11873. * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit
  11874. * content of Rs1. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then
  11875. * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
  11876. *
  11877. * **Operations**:\n
  11878. * ~~~
  11879. * tmp = Rs1.W[0] - Rs2.W[0];
  11880. * if (tmp > (2^16)-1) {
  11881. * tmp = (2^16)-1;
  11882. * OV = 1;
  11883. * }
  11884. * else if (tmp < 0) {
  11885. * tmp = 0;
  11886. * OV = 1;
  11887. * }
  11888. * Rd = SE(tmp[15:0]);
  11889. * ~~~
  11890. *
  11891. * \param [in] a unsigned int type of value stored in a
  11892. * \param [in] b unsigned int type of value stored in b
  11893. * \return value stored in unsigned long type
  11894. */
  11895. __STATIC_FORCEINLINE unsigned long __RV_UKSUBH(unsigned int a, unsigned int b)
  11896. {
  11897. unsigned long result;
  11898. __ASM volatile("uksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11899. return result;
  11900. }
  11901. /* ===== Inline Function End for 3.163. UKSUBH ===== */
  11902. /* ===== Inline Function Start for 3.164. UKSUBW ===== */
  11903. /**
  11904. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  11905. * \brief UKSUBW (Unsigned Subtraction with U32 Saturation)
  11906. * \details
  11907. * **Type**: DSP
  11908. *
  11909. * **Syntax**:\n
  11910. * ~~~
  11911. * UKSUBW Rd, Rs1, Rs2
  11912. * ~~~
  11913. *
  11914. * **Purpose**:\n
  11915. * Subtract the unsigned lower 32-bit content of two registers with unsigned 32-bit
  11916. * saturation.
  11917. *
  11918. * **Description**:\n
  11919. * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit
  11920. * content of Rs1. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then
  11921. * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
  11922. *
  11923. * **Operations**:\n
  11924. * ~~~
  11925. * tmp = Rs1.W[0] - Rs2.W[0];
  11926. * if (tmp < 0) {
  11927. * tmp[31:0] = 0;
  11928. * OV = 1;
  11929. * }
  11930. * Rd = tmp[31:0]; // RV32
  11931. * Rd = SE(tmp[31:0]); // RV64
  11932. * ~~~
  11933. *
  11934. * \param [in] a unsigned int type of value stored in a
  11935. * \param [in] b unsigned int type of value stored in b
  11936. * \return value stored in unsigned long type
  11937. */
  11938. __STATIC_FORCEINLINE unsigned long __RV_UKSUBW(unsigned int a, unsigned int b)
  11939. {
  11940. unsigned long result;
  11941. __ASM volatile("uksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11942. return result;
  11943. }
  11944. /* ===== Inline Function End for 3.164. UKSUBW ===== */
  11945. /* ===== Inline Function Start for 3.165. UMAR64 ===== */
  11946. /**
  11947. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  11948. * \brief UMAR64 (Unsigned Multiply and Add to 64-Bit Data)
  11949. * \details
  11950. * **Type**: DSP (64-bit Profile)
  11951. *
  11952. * **Syntax**:\n
  11953. * ~~~
  11954. * UMAR64 Rd, Rs1, Rs2
  11955. * ~~~
  11956. *
  11957. * **Purpose**:\n
  11958. * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication
  11959. * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is
  11960. * written back to the pair of registers (RV32) or a register (RV64).
  11961. *
  11962. * **RV32 Description**:\n
  11963. * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
  11964. * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers
  11965. * specified by Rd(4,1). The addition result is written back to the even/odd pair of registers specified by
  11966. * Rd(4,1).
  11967. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  11968. * includes register 2d and 2d+1.
  11969. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  11970. * of the pair contains the low 32-bit of the result.
  11971. *
  11972. * **RV64 Description**:\n
  11973. * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
  11974. * It adds the 64-bit multiplication results to the 64-bit unsigned data of Rd. The addition result is
  11975. * written back to Rd.
  11976. *
  11977. * **Operations**:\n
  11978. * ~~~
  11979. * * RV32:
  11980. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  11981. * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2);
  11982. * * RV64:
  11983. * Rd = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]);
  11984. * ~~~
  11985. *
  11986. * \param [in] t unsigned long long type of value stored in t
  11987. * \param [in] a unsigned long type of value stored in a
  11988. * \param [in] b unsigned long type of value stored in b
  11989. * \return value stored in unsigned long long type
  11990. */
  11991. __STATIC_FORCEINLINE unsigned long long __RV_UMAR64(unsigned long long t, unsigned long a, unsigned long b)
  11992. {
  11993. __ASM volatile("umar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  11994. return t;
  11995. }
  11996. /* ===== Inline Function End for 3.165. UMAR64 ===== */
  11997. /* ===== Inline Function Start for 3.166. UMAQA ===== */
  11998. /**
  11999. * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
  12000. * \brief UMAQA (Unsigned Multiply Four Bytes with 32- bit Adds)
  12001. * \details
  12002. * **Type**: DSP
  12003. *
  12004. * **Syntax**:\n
  12005. * ~~~
  12006. * UMAQA Rd, Rs1, Rs2
  12007. * ~~~
  12008. *
  12009. * **Purpose**:\n
  12010. * Do four unsigned 8-bit multiplications from 32-bit chunks of two registers; and then adds
  12011. * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together.
  12012. *
  12013. * **Description**:\n
  12014. * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four
  12015. * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
  12016. * unsigned content of the corresponding 32-bit chunks of Rd. The final results are written back to the
  12017. * corresponding 32-bit chunks in Rd.
  12018. *
  12019. * **Operations**:\n
  12020. * ~~~
  12021. * res[x] = Rd.W[x] + (Rs1.W[x].B[3] u* Rs2.W[x].B[3]) +
  12022. * (Rs1.W[x].B[2] u* Rs2.W[x].B[2]) + (Rs1.W[x].B[1] u* Rs2.W[x].B[1]) +
  12023. * (Rs1.W[x].B[0] u* Rs2.W[x].B[0]);
  12024. * Rd.W[x] = res[x];
  12025. * for RV32: x=0,
  12026. * for RV64: x=1...0
  12027. * ~~~
  12028. *
  12029. * \param [in] t unsigned long type of value stored in t
  12030. * \param [in] a unsigned long type of value stored in a
  12031. * \param [in] b unsigned long type of value stored in b
  12032. * \return value stored in unsigned long type
  12033. */
  12034. __STATIC_FORCEINLINE unsigned long __RV_UMAQA(unsigned long t, unsigned long a, unsigned long b)
  12035. {
  12036. __ASM volatile("umaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  12037. return t;
  12038. }
  12039. /* ===== Inline Function End for 3.166. UMAQA ===== */
  12040. /* ===== Inline Function Start for 3.167. UMAX8 ===== */
  12041. /**
  12042. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  12043. * \brief UMAX8 (SIMD 8-bit Unsigned Maximum)
  12044. * \details
  12045. * **Type**: SIMD
  12046. *
  12047. * **Syntax**:\n
  12048. * ~~~
  12049. * UMAX8 Rd, Rs1, Rs2
  12050. * ~~~
  12051. *
  12052. * **Purpose**:\n
  12053. * Do 8-bit unsigned integer elements finding maximum operations simultaneously.
  12054. *
  12055. * **Description**:\n
  12056. * This instruction compares the 8-bit unsigned integer elements in Rs1 with the four 8-
  12057. * bit unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
  12058. * two selected results are written to Rd.
  12059. *
  12060. * **Operations**:\n
  12061. * ~~~
  12062. * Rd.B[x] = (Rs1.B[x] >u Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
  12063. * for RV32: x=3...0,
  12064. * for RV64: x=7...0
  12065. * ~~~
  12066. *
  12067. * \param [in] a unsigned long type of value stored in a
  12068. * \param [in] b unsigned long type of value stored in b
  12069. * \return value stored in unsigned long type
  12070. */
  12071. __STATIC_FORCEINLINE unsigned long __RV_UMAX8(unsigned long a, unsigned long b)
  12072. {
  12073. unsigned long result;
  12074. __ASM volatile("umax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12075. return result;
  12076. }
  12077. /* ===== Inline Function End for 3.167. UMAX8 ===== */
  12078. /* ===== Inline Function Start for 3.168. UMAX16 ===== */
  12079. /**
  12080. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  12081. * \brief UMAX16 (SIMD 16-bit Unsigned Maximum)
  12082. * \details
  12083. * **Type**: SIMD
  12084. *
  12085. * **Syntax**:\n
  12086. * ~~~
  12087. * UMAX16 Rd, Rs1, Rs2
  12088. * ~~~
  12089. *
  12090. * **Purpose**:\n
  12091. * Do 16-bit unsigned integer elements finding maximum operations simultaneously.
  12092. *
  12093. * **Description**:\n
  12094. * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
  12095. * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
  12096. * selected results are written to Rd.
  12097. *
  12098. * **Operations**:\n
  12099. * ~~~
  12100. * Rd.H[x] = (Rs1.H[x] >u Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
  12101. * for RV32: x=1...0,
  12102. * for RV64: x=3...0
  12103. * ~~~
  12104. *
  12105. * \param [in] a unsigned long type of value stored in a
  12106. * \param [in] b unsigned long type of value stored in b
  12107. * \return value stored in unsigned long type
  12108. */
  12109. __STATIC_FORCEINLINE unsigned long __RV_UMAX16(unsigned long a, unsigned long b)
  12110. {
  12111. unsigned long result;
  12112. __ASM volatile("umax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12113. return result;
  12114. }
  12115. /* ===== Inline Function End for 3.168. UMAX16 ===== */
  12116. /* ===== Inline Function Start for 3.169. UMIN8 ===== */
  12117. /**
  12118. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  12119. * \brief UMIN8 (SIMD 8-bit Unsigned Minimum)
  12120. * \details
  12121. * **Type**: SIMD
  12122. *
  12123. * **Syntax**:\n
  12124. * ~~~
  12125. * UMIN8 Rd, Rs1, Rs2
  12126. * ~~~
  12127. *
  12128. * **Purpose**:\n
  12129. * Do 8-bit unsigned integer elements finding minimum operations simultaneously.
  12130. *
  12131. * **Description**:\n
  12132. * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
  12133. * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
  12134. * selected results are written to Rd.
  12135. *
  12136. * **Operations**:\n
  12137. * ~~~
  12138. * Rd.B[x] = (Rs1.B[x] <u Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
  12139. * for RV32: x=3...0,
  12140. * for RV64: x=7...0
  12141. * ~~~
  12142. *
  12143. * \param [in] a unsigned long type of value stored in a
  12144. * \param [in] b unsigned long type of value stored in b
  12145. * \return value stored in unsigned long type
  12146. */
  12147. __STATIC_FORCEINLINE unsigned long __RV_UMIN8(unsigned long a, unsigned long b)
  12148. {
  12149. unsigned long result;
  12150. __ASM volatile("umin8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12151. return result;
  12152. }
  12153. /* ===== Inline Function End for 3.169. UMIN8 ===== */
  12154. /* ===== Inline Function Start for 3.170. UMIN16 ===== */
  12155. /**
  12156. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  12157. * \brief UMIN16 (SIMD 16-bit Unsigned Minimum)
  12158. * \details
  12159. * **Type**: SIMD
  12160. *
  12161. * **Syntax**:\n
  12162. * ~~~
  12163. * UMIN16 Rd, Rs1, Rs2
  12164. * ~~~
  12165. *
  12166. * **Purpose**:\n
  12167. * Do 16-bit unsigned integer elements finding minimum operations simultaneously.
  12168. *
  12169. * **Description**:\n
  12170. * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
  12171. * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
  12172. * selected results are written to Rd.
  12173. *
  12174. * **Operations**:\n
  12175. * ~~~
  12176. * Rd.H[x] = (Rs1.H[x] <u Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
  12177. * for RV32: x=1...0,
  12178. * for RV64: x=3...0
  12179. * ~~~
  12180. *
  12181. * \param [in] a unsigned long type of value stored in a
  12182. * \param [in] b unsigned long type of value stored in b
  12183. * \return value stored in unsigned long type
  12184. */
  12185. __STATIC_FORCEINLINE unsigned long __RV_UMIN16(unsigned long a, unsigned long b)
  12186. {
  12187. unsigned long result;
  12188. __ASM volatile("umin16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12189. return result;
  12190. }
  12191. /* ===== Inline Function End for 3.170. UMIN16 ===== */
  12192. /* ===== Inline Function Start for 3.171. UMSR64 ===== */
  12193. /**
  12194. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  12195. * \brief UMSR64 (Unsigned Multiply and Subtract from 64-Bit Data)
  12196. * \details
  12197. * **Type**: DSP (64-bit Profile)
  12198. *
  12199. * **Syntax**:\n
  12200. * ~~~
  12201. * UMSR64 Rd, Rs1, Rs2
  12202. * ~~~
  12203. *
  12204. * **Purpose**:\n
  12205. * Multiply the 32-bit unsigned elements in two registers and subtract the 64-bit
  12206. * multiplication results from the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64).
  12207. * The result is written back to the pair of registers (RV32) or a register (RV64).
  12208. *
  12209. * **RV32 Description**:\n
  12210. * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
  12211. * subtracts the 64-bit multiplication result from the 64-bit unsigned data of an even/odd pair of
  12212. * registers specified by Rd(4,1). The subtraction result is written back to the even/odd pair of registers
  12213. * specified by Rd(4,1).
  12214. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  12215. * includes register 2d and 2d+1.
  12216. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  12217. * of the pair contains the low 32-bit of the result.
  12218. *
  12219. * **RV64 Description**:\n
  12220. * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
  12221. * It subtracts the 64-bit multiplication results from the 64-bit unsigned data of Rd. The subtraction
  12222. * result is written back to Rd.
  12223. *
  12224. * **Operations**:\n
  12225. * ~~~
  12226. * * RV32:
  12227. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  12228. * R[t_H].R[t_L] = R[t_H].R[t_L] - (Rs1 * Rs2);
  12229. * * RV64:
  12230. * Rd = Rd - (Rs1.W[0] u* Rs2.W[0]) - (Rs1.W[1] u* Rs2.W[1]);
  12231. * ~~~
  12232. *
  12233. * \param [in] t unsigned long long type of value stored in t
  12234. * \param [in] a unsigned long type of value stored in a
  12235. * \param [in] b unsigned long type of value stored in b
  12236. * \return value stored in unsigned long long type
  12237. */
  12238. __STATIC_FORCEINLINE unsigned long long __RV_UMSR64(unsigned long long t, unsigned long a, unsigned long b)
  12239. {
  12240. __ASM volatile("umsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  12241. return t;
  12242. }
  12243. /* ===== Inline Function End for 3.171. UMSR64 ===== */
  12244. /* ===== Inline Function Start for 3.172.1. UMUL8 ===== */
  12245. /**
  12246. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
  12247. * \brief UMUL8 (SIMD Unsigned 8-bit Multiply)
  12248. * \details
  12249. * **Type**: SIMD
  12250. *
  12251. * **Syntax**:\n
  12252. * ~~~
  12253. * UMUL8 Rd, Rs1, Rs2
  12254. * UMULX8 Rd, Rs1, Rs2
  12255. * ~~~
  12256. *
  12257. * **Purpose**:\n
  12258. * Do unsigned 8-bit multiplications and generate four 16-bit results simultaneously.
  12259. *
  12260. * **RV32 Description**:\n
  12261. * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
  12262. * with the corresponding unsigned 8-bit data elements of Rs2.
  12263. * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
  12264. * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
  12265. * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
  12266. * elements of Rs2.
  12267. * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
  12268. * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  12269. * includes register 2d and 2d+1.
  12270. * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
  12271. * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
  12272. * part of Rs1.
  12273. *
  12274. * **RV64 Description**:\n
  12275. * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
  12276. * with the corresponding unsigned 8-bit data elements of Rs2.
  12277. * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
  12278. * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
  12279. * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
  12280. * elements of Rs2.
  12281. * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
  12282. * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
  12283. * the bottom part of Rs1.
  12284. *
  12285. * **Operations**:\n
  12286. * ~~~
  12287. * * RV32:
  12288. * if (is `UMUL8`) {
  12289. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  12290. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  12291. * } else if (is `UMULX8`) {
  12292. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  12293. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  12294. * }
  12295. * rest[x/2] = op1t[x/2] u* op2t[x/2];
  12296. * resb[x/2] = op1b[x/2] u* op2b[x/2];
  12297. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  12298. * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
  12299. * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
  12300. * x = 0 and 2
  12301. * * RV64:
  12302. * if (is `UMUL8`) {
  12303. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  12304. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  12305. * } else if (is `UMULX8`) {
  12306. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  12307. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  12308. * }
  12309. * rest[x/2] = op1t[x/2] u* op2t[x/2];
  12310. * resb[x/2] = op1b[x/2] u* op2b[x/2];
  12311. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  12312. * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
  12313. * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; x = 0 and 2
  12314. * ~~~
  12315. *
  12316. * \param [in] a unsigned int type of value stored in a
  12317. * \param [in] b unsigned int type of value stored in b
  12318. * \return value stored in unsigned long long type
  12319. */
  12320. __STATIC_FORCEINLINE unsigned long long __RV_UMUL8(unsigned int a, unsigned int b)
  12321. {
  12322. unsigned long long result;
  12323. __ASM volatile("umul8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12324. return result;
  12325. }
  12326. /* ===== Inline Function End for 3.172.1. UMUL8 ===== */
  12327. /* ===== Inline Function Start for 3.172.2. UMULX8 ===== */
  12328. /**
  12329. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
  12330. * \brief UMULX8 (SIMD Unsigned Crossed 8-bit Multiply)
  12331. * \details
  12332. * **Type**: SIMD
  12333. *
  12334. * **Syntax**:\n
  12335. * ~~~
  12336. * UMUL8 Rd, Rs1, Rs2
  12337. * UMULX8 Rd, Rs1, Rs2
  12338. * ~~~
  12339. *
  12340. * **Purpose**:\n
  12341. * Do unsigned 8-bit multiplications and generate four 16-bit results simultaneously.
  12342. *
  12343. * **RV32 Description**:\n
  12344. * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
  12345. * with the corresponding unsigned 8-bit data elements of Rs2.
  12346. * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
  12347. * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
  12348. * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
  12349. * elements of Rs2.
  12350. * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
  12351. * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  12352. * includes register 2d and 2d+1.
  12353. * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
  12354. * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
  12355. * part of Rs1.
  12356. *
  12357. * **RV64 Description**:\n
  12358. * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
  12359. * with the corresponding unsigned 8-bit data elements of Rs2.
  12360. * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
  12361. * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
  12362. * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
  12363. * elements of Rs2.
  12364. * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
  12365. * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
  12366. * the bottom part of Rs1.
  12367. *
  12368. * **Operations**:\n
  12369. * ~~~
  12370. * * RV32:
  12371. * if (is `UMUL8`) {
  12372. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  12373. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  12374. * } else if (is `UMULX8`) {
  12375. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  12376. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  12377. * }
  12378. * rest[x/2] = op1t[x/2] u* op2t[x/2];
  12379. * resb[x/2] = op1b[x/2] u* op2b[x/2];
  12380. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  12381. * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
  12382. * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
  12383. * x = 0 and 2
  12384. * * RV64:
  12385. * if (is `UMUL8`) {
  12386. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  12387. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  12388. * } else if (is `UMULX8`) {
  12389. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  12390. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  12391. * }
  12392. * rest[x/2] = op1t[x/2] u* op2t[x/2];
  12393. * resb[x/2] = op1b[x/2] u* op2b[x/2];
  12394. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  12395. * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
  12396. * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; x = 0 and 2
  12397. * ~~~
  12398. *
  12399. * \param [in] a unsigned int type of value stored in a
  12400. * \param [in] b unsigned int type of value stored in b
  12401. * \return value stored in unsigned long long type
  12402. */
  12403. __STATIC_FORCEINLINE unsigned long long __RV_UMULX8(unsigned int a, unsigned int b)
  12404. {
  12405. unsigned long long result;
  12406. __ASM volatile("umulx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12407. return result;
  12408. }
  12409. /* ===== Inline Function End for 3.172.2. UMULX8 ===== */
  12410. /* ===== Inline Function Start for 3.173.1. UMUL16 ===== */
  12411. /**
  12412. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
  12413. * \brief UMUL16 (SIMD Unsigned 16-bit Multiply)
  12414. * \details
  12415. * **Type**: SIMD
  12416. *
  12417. * **Syntax**:\n
  12418. * ~~~
  12419. * UMUL16 Rd, Rs1, Rs2
  12420. * UMULX16 Rd, Rs1, Rs2
  12421. * ~~~
  12422. *
  12423. * **Purpose**:\n
  12424. * Do unsigned 16-bit multiplications and generate two 32-bit results simultaneously.
  12425. *
  12426. * **RV32 Description**:\n
  12427. * For the `UMUL16` instruction, multiply the top 16-bit U16 content of Rs1 with
  12428. * the top 16-bit U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1
  12429. * with the bottom 16-bit U16 content of Rs2.
  12430. * For the `UMULX16` instruction, multiply the top 16-bit U16 content of Rs1 with the bottom 16-bit
  12431. * U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1 with the top 16-
  12432. * bit U16 content of Rs2.
  12433. * The two U32 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
  12434. * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
  12435. * register 2d and 2d+1.
  12436. * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
  12437. * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
  12438. *
  12439. * **RV64 Description**:\n
  12440. * For the `UMUL16` instruction, multiply the top 16-bit U16 content of the lower
  12441. * 32-bit word in Rs1 with the top 16-bit U16 content of the lower 32-bit word in Rs2. At the same time,
  12442. * multiply the bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the bottom 16-bit U16
  12443. * content of the lower 32-bit word in Rs2.
  12444. * For the `UMULX16` instruction, multiply the top 16-bit U16 content of the lower 32-bit word in Rs1
  12445. * with the bottom 16-bit U16 content of the lower 32-bit word in Rs2. At the same time, multiply the
  12446. * bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the top 16-bit U16 content of the
  12447. * lower 32-bit word in Rs2.
  12448. * The two 32-bit U32 results are then written into Rd. The result calculated from the top 16-bit of the
  12449. * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
  12450. * the lower 32-bit word in Rs1 is written to Rd.W[0]
  12451. *
  12452. * **Operations**:\n
  12453. * ~~~
  12454. * * RV32:
  12455. * if (is `UMUL16`) {
  12456. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  12457. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  12458. * } else if (is `UMULX16`) {
  12459. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  12460. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  12461. * }
  12462. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  12463. * res = aop u* bop;
  12464. * }
  12465. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  12466. * R[t_H] = rest;
  12467. * R[t_L] = resb;
  12468. * * RV64:
  12469. * if (is `UMUL16`) {
  12470. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  12471. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  12472. * } else if (is `UMULX16`) {
  12473. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  12474. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  12475. * }
  12476. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  12477. * res = aop u* bop;
  12478. * }
  12479. * Rd.W[1] = rest;
  12480. * Rd.W[0] = resb;
  12481. * ~~~
  12482. *
  12483. * \param [in] a unsigned int type of value stored in a
  12484. * \param [in] b unsigned int type of value stored in b
  12485. * \return value stored in unsigned long long type
  12486. */
  12487. __STATIC_FORCEINLINE unsigned long long __RV_UMUL16(unsigned int a, unsigned int b)
  12488. {
  12489. unsigned long long result;
  12490. __ASM volatile("umul16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12491. return result;
  12492. }
  12493. /* ===== Inline Function End for 3.173.1. UMUL16 ===== */
  12494. /* ===== Inline Function Start for 3.173.2. UMULX16 ===== */
  12495. /**
  12496. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
  12497. * \brief UMULX16 (SIMD Unsigned Crossed 16-bit Multiply)
  12498. * \details
  12499. * **Type**: SIMD
  12500. *
  12501. * **Syntax**:\n
  12502. * ~~~
  12503. * UMUL16 Rd, Rs1, Rs2
  12504. * UMULX16 Rd, Rs1, Rs2
  12505. * ~~~
  12506. *
  12507. * **Purpose**:\n
  12508. * Do unsigned 16-bit multiplications and generate two 32-bit results simultaneously.
  12509. *
  12510. * **RV32 Description**:\n
  12511. * For the `UMUL16` instruction, multiply the top 16-bit U16 content of Rs1 with
  12512. * the top 16-bit U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1
  12513. * with the bottom 16-bit U16 content of Rs2.
  12514. * For the `UMULX16` instruction, multiply the top 16-bit U16 content of Rs1 with the bottom 16-bit
  12515. * U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1 with the top 16-
  12516. * bit U16 content of Rs2.
  12517. * The two U32 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
  12518. * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
  12519. * register 2d and 2d+1.
  12520. * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
  12521. * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
  12522. *
  12523. * **RV64 Description**:\n
  12524. * For the `UMUL16` instruction, multiply the top 16-bit U16 content of the lower
  12525. * 32-bit word in Rs1 with the top 16-bit U16 content of the lower 32-bit word in Rs2. At the same time,
  12526. * multiply the bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the bottom 16-bit U16
  12527. * content of the lower 32-bit word in Rs2.
  12528. * For the `UMULX16` instruction, multiply the top 16-bit U16 content of the lower 32-bit word in Rs1
  12529. * with the bottom 16-bit U16 content of the lower 32-bit word in Rs2. At the same time, multiply the
  12530. * bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the top 16-bit U16 content of the
  12531. * lower 32-bit word in Rs2.
  12532. * The two 32-bit U32 results are then written into Rd. The result calculated from the top 16-bit of the
  12533. * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
  12534. * the lower 32-bit word in Rs1 is written to Rd.W[0]
  12535. *
  12536. * **Operations**:\n
  12537. * ~~~
  12538. * * RV32:
  12539. * if (is `UMUL16`) {
  12540. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  12541. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  12542. * } else if (is `UMULX16`) {
  12543. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  12544. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  12545. * }
  12546. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  12547. * res = aop u* bop;
  12548. * }
  12549. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  12550. * R[t_H] = rest;
  12551. * R[t_L] = resb;
  12552. * * RV64:
  12553. * if (is `UMUL16`) {
  12554. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  12555. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  12556. * } else if (is `UMULX16`) {
  12557. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  12558. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  12559. * }
  12560. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  12561. * res = aop u* bop;
  12562. * }
  12563. * Rd.W[1] = rest;
  12564. * Rd.W[0] = resb;
  12565. * ~~~
  12566. *
  12567. * \param [in] a unsigned int type of value stored in a
  12568. * \param [in] b unsigned int type of value stored in b
  12569. * \return value stored in unsigned long long type
  12570. */
  12571. __STATIC_FORCEINLINE unsigned long long __RV_UMULX16(unsigned int a, unsigned int b)
  12572. {
  12573. unsigned long long result;
  12574. __ASM volatile("umulx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12575. return result;
  12576. }
  12577. /* ===== Inline Function End for 3.173.2. UMULX16 ===== */
  12578. /* ===== Inline Function Start for 3.174. URADD8 ===== */
  12579. /**
  12580. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  12581. * \brief URADD8 (SIMD 8-bit Unsigned Halving Addition)
  12582. * \details
  12583. * **Type**: SIMD
  12584. *
  12585. * **Syntax**:\n
  12586. * ~~~
  12587. * URADD8 Rd, Rs1, Rs2
  12588. * ~~~
  12589. *
  12590. * **Purpose**:\n
  12591. * Do 8-bit unsigned integer element additions simultaneously. The results are halved to
  12592. * avoid overflow or saturation.
  12593. *
  12594. * **Description**:\n
  12595. * This instruction adds the 8-bit unsigned integer elements in Rs1 with the 8-bit
  12596. * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
  12597. * written to Rd.
  12598. *
  12599. * **Examples**:\n
  12600. * ~~~
  12601. * * Ra = 0x7F, Rb = 0x7F, Rt = 0x7F
  12602. * * Ra = 0x80, Rb = 0x80, Rt = 0x80
  12603. * * Ra = 0x40, Rb = 0x80, Rt = 0x60
  12604. * ~~~
  12605. *
  12606. * **Operations**:\n
  12607. * ~~~
  12608. * Rd.B[x] = (Rs1.B[x] + Rs2.B[x]) u>> 1;
  12609. * for RV32: x=3...0,
  12610. * for RV64: x=7...0
  12611. * ~~~
  12612. *
  12613. * \param [in] a unsigned long type of value stored in a
  12614. * \param [in] b unsigned long type of value stored in b
  12615. * \return value stored in unsigned long type
  12616. */
  12617. __STATIC_FORCEINLINE unsigned long __RV_URADD8(unsigned long a, unsigned long b)
  12618. {
  12619. unsigned long result;
  12620. __ASM volatile("uradd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12621. return result;
  12622. }
  12623. /* ===== Inline Function End for 3.174. URADD8 ===== */
  12624. /* ===== Inline Function Start for 3.175. URADD16 ===== */
  12625. /**
  12626. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  12627. * \brief URADD16 (SIMD 16-bit Unsigned Halving Addition)
  12628. * \details
  12629. * **Type**: SIMD
  12630. *
  12631. * **Syntax**:\n
  12632. * ~~~
  12633. * URADD16 Rd, Rs1, Rs2
  12634. * ~~~
  12635. *
  12636. * **Purpose**:\n
  12637. * Do 16-bit unsigned integer element additions simultaneously. The results are halved to
  12638. * avoid overflow or saturation.
  12639. *
  12640. * **Description**:\n
  12641. * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit
  12642. * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
  12643. * written to Rd.
  12644. *
  12645. * **Examples**:\n
  12646. * ~~~
  12647. * * Ra = 0x7FFF, Rb = 0x7FFF Rt = 0x7FFF
  12648. * * Ra = 0x8000, Rb = 0x8000 Rt = 0x8000
  12649. * * Ra = 0x4000, Rb = 0x8000 Rt = 0x6000
  12650. * ~~~
  12651. *
  12652. * **Operations**:\n
  12653. * ~~~
  12654. * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) u>> 1;
  12655. * for RV32: x=1...0,
  12656. * for RV64: x=3...0
  12657. * ~~~
  12658. *
  12659. * \param [in] a unsigned long type of value stored in a
  12660. * \param [in] b unsigned long type of value stored in b
  12661. * \return value stored in unsigned long type
  12662. */
  12663. __STATIC_FORCEINLINE unsigned long __RV_URADD16(unsigned long a, unsigned long b)
  12664. {
  12665. unsigned long result;
  12666. __ASM volatile("uradd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12667. return result;
  12668. }
  12669. /* ===== Inline Function End for 3.175. URADD16 ===== */
  12670. /* ===== Inline Function Start for 3.176. URADD64 ===== */
  12671. /**
  12672. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  12673. * \brief URADD64 (64-bit Unsigned Halving Addition)
  12674. * \details
  12675. * **Type**: DSP (64-bit Profile)
  12676. *
  12677. * **Syntax**:\n
  12678. * ~~~
  12679. * URADD64 Rd, Rs1, Rs2
  12680. * ~~~
  12681. *
  12682. * **Purpose**:\n
  12683. * Add two 64-bit unsigned integers. The result is halved to avoid overflow or saturation.
  12684. *
  12685. * **RV32 Description**:\n
  12686. * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers
  12687. * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by
  12688. * Rs2(4,1). The 64-bit addition result is first logically right-shifted by 1 bit and then written to an
  12689. * even/odd pair of registers specified by Rd(4,1).
  12690. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  12691. * includes register 2d and 2d+1.
  12692. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  12693. * of the pair contains the low 32-bit of the result.
  12694. *
  12695. * **RV64 Description**:\n
  12696. * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned
  12697. * integer Rs2. The 64-bit addition result is first logically right-shifted by 1 bit and then written to Rd.
  12698. *
  12699. * **Operations**:\n
  12700. * ~~~
  12701. * * RV32:
  12702. * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
  12703. * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
  12704. * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
  12705. * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) u>> 1;
  12706. * * RV64:
  12707. * Rd = (Rs1 + Rs2) u>> 1;
  12708. * ~~~
  12709. *
  12710. * \param [in] a unsigned long long type of value stored in a
  12711. * \param [in] b unsigned long long type of value stored in b
  12712. * \return value stored in unsigned long long type
  12713. */
  12714. __STATIC_FORCEINLINE unsigned long long __RV_URADD64(unsigned long long a, unsigned long long b)
  12715. {
  12716. unsigned long long result;
  12717. __ASM volatile("uradd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12718. return result;
  12719. }
  12720. /* ===== Inline Function End for 3.176. URADD64 ===== */
  12721. /* ===== Inline Function Start for 3.177. URADDW ===== */
  12722. /**
  12723. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  12724. * \brief URADDW (32-bit Unsigned Halving Addition)
  12725. * \details
  12726. * **Type**: DSP
  12727. *
  12728. * **Syntax**:\n
  12729. * ~~~
  12730. * URADDW Rd, Rs1, Rs2
  12731. * ~~~
  12732. *
  12733. * **Purpose**:\n
  12734. * Add 32-bit unsigned integers and the results are halved to avoid overflow or saturation.
  12735. *
  12736. * **Description**:\n
  12737. * This instruction adds the first 32-bit unsigned integer in Rs1 with the first 32-bit
  12738. * unsigned integer in Rs2. The result is first logically right-shifted by 1 bit and then sign-extended and
  12739. * written to Rd.
  12740. *
  12741. * **Examples**:\n
  12742. * ~~~
  12743. * * Ra = 0x7FFFFFFF, Rb = 0x7FFFFFFF Rt = 0x7FFFFFFF
  12744. * * Ra = 0x80000000, Rb = 0x80000000 Rt = 0x80000000
  12745. * * Ra = 0x40000000, Rb = 0x80000000 Rt = 0x60000000
  12746. * ~~~
  12747. *
  12748. * **Operations**:\n
  12749. * ~~~
  12750. * * RV32:
  12751. * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1;
  12752. * * RV64:
  12753. * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1;
  12754. * Rd[63:0] = SE(resw[31:0]);
  12755. * ~~~
  12756. *
  12757. * \param [in] a unsigned int type of value stored in a
  12758. * \param [in] b unsigned int type of value stored in b
  12759. * \return value stored in unsigned long type
  12760. */
  12761. __STATIC_FORCEINLINE unsigned long __RV_URADDW(unsigned int a, unsigned int b)
  12762. {
  12763. unsigned long result;
  12764. __ASM volatile("uraddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12765. return result;
  12766. }
  12767. /* ===== Inline Function End for 3.177. URADDW ===== */
  12768. /* ===== Inline Function Start for 3.178. URCRAS16 ===== */
  12769. /**
  12770. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  12771. * \brief URCRAS16 (SIMD 16-bit Unsigned Halving Cross Addition & Subtraction)
  12772. * \details
  12773. * **Type**: SIMD
  12774. *
  12775. * **Syntax**:\n
  12776. * ~~~
  12777. * URCRAS16 Rd, Rs1, Rs2
  12778. * ~~~
  12779. *
  12780. * **Purpose**:\n
  12781. * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element
  12782. * subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
  12783. * The results are halved to avoid overflow or saturation.
  12784. *
  12785. * **Description**:\n
  12786. * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1
  12787. * with the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned
  12788. * integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks
  12789. * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32-
  12790. * bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  12791. *
  12792. * **Examples**:\n
  12793. * ~~~
  12794. * Please see `URADD16` and `URSUB16` instructions.
  12795. * ~~~
  12796. *
  12797. * **Operations**:\n
  12798. * ~~~
  12799. * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) u>> 1;
  12800. * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) u>> 1;
  12801. * for RV32, x=0
  12802. * for RV64, x=1...0
  12803. * ~~~
  12804. *
  12805. * \param [in] a unsigned long type of value stored in a
  12806. * \param [in] b unsigned long type of value stored in b
  12807. * \return value stored in unsigned long type
  12808. */
  12809. __STATIC_FORCEINLINE unsigned long __RV_URCRAS16(unsigned long a, unsigned long b)
  12810. {
  12811. unsigned long result;
  12812. __ASM volatile("urcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12813. return result;
  12814. }
  12815. /* ===== Inline Function End for 3.178. URCRAS16 ===== */
  12816. /* ===== Inline Function Start for 3.179. URCRSA16 ===== */
  12817. /**
  12818. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  12819. * \brief URCRSA16 (SIMD 16-bit Unsigned Halving Cross Subtraction & Addition)
  12820. * \details
  12821. * **Type**: SIMD
  12822. *
  12823. * **Syntax**:\n
  12824. * ~~~
  12825. * URCRSA16 Rd, Rs1, Rs2
  12826. * ~~~
  12827. *
  12828. * **Purpose**:\n
  12829. * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element
  12830. * addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
  12831. * The results are halved to avoid overflow or saturation.
  12832. *
  12833. * **Description**:\n
  12834. * This instruction subtracts the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2
  12835. * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned
  12836. * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [31:16] of 32-bit chunks
  12837. * in Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit
  12838. * chunks in Rd and [15:0] of 32-bit chunks in Rd.
  12839. *
  12840. * **Examples**:\n
  12841. * ~~~
  12842. * Please see `URADD16` and `URSUB16` instructions.
  12843. * ~~~
  12844. *
  12845. * **Operations**:\n
  12846. * ~~~
  12847. * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) u>> 1;
  12848. * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) u>> 1;
  12849. * for RV32, x=0
  12850. * for RV64, x=1...0
  12851. * ~~~
  12852. *
  12853. * \param [in] a unsigned long type of value stored in a
  12854. * \param [in] b unsigned long type of value stored in b
  12855. * \return value stored in unsigned long type
  12856. */
  12857. __STATIC_FORCEINLINE unsigned long __RV_URCRSA16(unsigned long a, unsigned long b)
  12858. {
  12859. unsigned long result;
  12860. __ASM volatile("urcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12861. return result;
  12862. }
  12863. /* ===== Inline Function End for 3.179. URCRSA16 ===== */
  12864. /* ===== Inline Function Start for 3.180. URSTAS16 ===== */
  12865. /**
  12866. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  12867. * \brief URSTAS16 (SIMD 16-bit Unsigned Halving Straight Addition & Subtraction)
  12868. * \details
  12869. * **Type**: SIMD
  12870. *
  12871. * **Syntax**:\n
  12872. * ~~~
  12873. * URSTAS16 Rd, Rs1, Rs2
  12874. * ~~~
  12875. *
  12876. * **Purpose**:\n
  12877. * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element
  12878. * subtraction in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit
  12879. * chunks. The results are halved to avoid overflow or saturation.
  12880. *
  12881. * **Description**:\n
  12882. * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1
  12883. * with the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned
  12884. * integer in [15:0] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks
  12885. * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32-
  12886. * bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  12887. *
  12888. * **Examples**:\n
  12889. * ~~~
  12890. * Please see `URADD16` and `URSUB16` instructions.
  12891. * ~~~
  12892. *
  12893. * **Operations**:\n
  12894. * ~~~
  12895. * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) u>> 1;
  12896. * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) u>> 1;
  12897. * for RV32, x=0
  12898. * for RV64, x=1...0
  12899. * ~~~
  12900. *
  12901. * \param [in] a unsigned long type of value stored in a
  12902. * \param [in] b unsigned long type of value stored in b
  12903. * \return value stored in unsigned long type
  12904. */
  12905. __STATIC_FORCEINLINE unsigned long __RV_URSTAS16(unsigned long a, unsigned long b)
  12906. {
  12907. unsigned long result;
  12908. __ASM volatile("urstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12909. return result;
  12910. }
  12911. /* ===== Inline Function End for 3.180. URSTAS16 ===== */
  12912. /* ===== Inline Function Start for 3.181. URSTSA16 ===== */
  12913. /**
  12914. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  12915. * \brief URSTSA16 (SIMD 16-bit Unsigned Halving Straight Subtraction & Addition)
  12916. * \details
  12917. * **Type**: SIMD
  12918. *
  12919. * **Syntax**:\n
  12920. * ~~~
  12921. * URCRSA16 Rd, Rs1, Rs2
  12922. * ~~~
  12923. *
  12924. * **Purpose**:\n
  12925. * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element
  12926. * addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit
  12927. * chunks. The results are halved to avoid overflow or saturation.
  12928. *
  12929. * **Description**:\n
  12930. * This instruction subtracts the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2
  12931. * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned
  12932. * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [15:0] of 32-bit chunks in
  12933. * Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit
  12934. * chunks in Rd and [15:0] of 32-bit chunks in Rd.
  12935. *
  12936. * **Examples**:\n
  12937. * ~~~
  12938. * Please see `URADD16` and `URSUB16` instructions.
  12939. * ~~~
  12940. *
  12941. * **Operations**:\n
  12942. * ~~~
  12943. * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) u>> 1;
  12944. * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) u>> 1;
  12945. * for RV32, x=0
  12946. * for RV64, x=1...0
  12947. * ~~~
  12948. *
  12949. * \param [in] a unsigned long type of value stored in a
  12950. * \param [in] b unsigned long type of value stored in b
  12951. * \return value stored in unsigned long type
  12952. */
  12953. __STATIC_FORCEINLINE unsigned long __RV_URSTSA16(unsigned long a, unsigned long b)
  12954. {
  12955. unsigned long result;
  12956. __ASM volatile("urstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12957. return result;
  12958. }
  12959. /* ===== Inline Function End for 3.181. URSTSA16 ===== */
  12960. /* ===== Inline Function Start for 3.182. URSUB8 ===== */
  12961. /**
  12962. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  12963. * \brief URSUB8 (SIMD 8-bit Unsigned Halving Subtraction)
  12964. * \details
  12965. * **Type**: SIMD
  12966. *
  12967. * **Syntax**:\n
  12968. * ~~~
  12969. * URSUB8 Rd, Rs1, Rs2
  12970. * ~~~
  12971. *
  12972. * **Purpose**:\n
  12973. * Do 8-bit unsigned integer element subtractions simultaneously. The results are halved to
  12974. * avoid overflow or saturation.
  12975. *
  12976. * **Description**:\n
  12977. * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit
  12978. * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
  12979. * written to Rd.
  12980. *
  12981. * **Examples**:\n
  12982. * ~~~
  12983. * * Ra = 0x7F, Rb = 0x80 Rt = 0xFF
  12984. * * Ra = 0x80, Rb = 0x7F Rt = 0x00
  12985. * * Ra = 0x80, Rb = 0x40 Rt = 0x20
  12986. * ~~~
  12987. *
  12988. * **Operations**:\n
  12989. * ~~~
  12990. * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) u>> 1;
  12991. * for RV32: x=3...0,
  12992. * for RV64: x=7...0
  12993. * ~~~
  12994. *
  12995. * \param [in] a unsigned long type of value stored in a
  12996. * \param [in] b unsigned long type of value stored in b
  12997. * \return value stored in unsigned long type
  12998. */
  12999. __STATIC_FORCEINLINE unsigned long __RV_URSUB8(unsigned long a, unsigned long b)
  13000. {
  13001. unsigned long result;
  13002. __ASM volatile("ursub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13003. return result;
  13004. }
  13005. /* ===== Inline Function End for 3.182. URSUB8 ===== */
  13006. /* ===== Inline Function Start for 3.183. URSUB16 ===== */
  13007. /**
  13008. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  13009. * \brief URSUB16 (SIMD 16-bit Unsigned Halving Subtraction)
  13010. * \details
  13011. * **Type**: SIMD
  13012. *
  13013. * **Syntax**:\n
  13014. * ~~~
  13015. * URSUB16 Rd, Rs1, Rs2
  13016. * ~~~
  13017. *
  13018. * **Purpose**:\n
  13019. * Do 16-bit unsigned integer element subtractions simultaneously. The results are halved to
  13020. * avoid overflow or saturation.
  13021. *
  13022. * **Description**:\n
  13023. * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit
  13024. * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
  13025. * written to Rd.
  13026. *
  13027. * **Examples**:\n
  13028. * ~~~
  13029. * * Ra = 0x7FFF, Rb = 0x8000 Rt = 0xFFFF
  13030. * * Ra = 0x8000, Rb = 0x7FFF Rt = 0x0000
  13031. * * Ra = 0x8000, Rb = 0x4000 Rt = 0x2000
  13032. * ~~~
  13033. *
  13034. * **Operations**:\n
  13035. * ~~~
  13036. * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) u>> 1;
  13037. * for RV32: x=1...0,
  13038. * for RV64: x=3...0
  13039. * ~~~
  13040. *
  13041. * \param [in] a unsigned long type of value stored in a
  13042. * \param [in] b unsigned long type of value stored in b
  13043. * \return value stored in unsigned long type
  13044. */
  13045. __STATIC_FORCEINLINE unsigned long __RV_URSUB16(unsigned long a, unsigned long b)
  13046. {
  13047. unsigned long result;
  13048. __ASM volatile("ursub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13049. return result;
  13050. }
  13051. /* ===== Inline Function End for 3.183. URSUB16 ===== */
  13052. /* ===== Inline Function Start for 3.184. URSUB64 ===== */
  13053. /**
  13054. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  13055. * \brief URSUB64 (64-bit Unsigned Halving Subtraction)
  13056. * \details
  13057. * **Type**: DSP (64-bit Profile)
  13058. *
  13059. * **Syntax**:\n
  13060. * ~~~
  13061. * URSUB64 Rd, Rs1, Rs2
  13062. * ~~~
  13063. *
  13064. * **Purpose**:\n
  13065. * Perform a 64-bit unsigned integer subtraction. The result is halved to avoid overflow or
  13066. * saturation.
  13067. *
  13068. * **RV32 Description**:\n
  13069. * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of
  13070. * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers
  13071. * specified by Rs1(4,1). The subtraction result is first logically right-shifted by 1 bit and then written
  13072. * to an even/odd pair of registers specified by Rd(4,1).
  13073. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  13074. * includes register 2d and 2d+1.
  13075. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  13076. * of the pair contains the low 32-bit of the result.
  13077. *
  13078. * **RV64 Description**:\n
  13079. * This instruction subtracts the 64-bit unsigned integer in Rs2 from the 64-bit
  13080. * unsigned integer in Rs1. The subtraction result is first logically right-shifted by 1 bit and then
  13081. * written to Rd.
  13082. *
  13083. * **Operations**:\n
  13084. * ~~~
  13085. * * RV32:
  13086. * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
  13087. * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
  13088. * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
  13089. * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) u>> 1;
  13090. * * RV64:
  13091. * Rd = (Rs1 - Rs2) u>> 1;
  13092. * ~~~
  13093. *
  13094. * \param [in] a unsigned long long type of value stored in a
  13095. * \param [in] b unsigned long long type of value stored in b
  13096. * \return value stored in unsigned long long type
  13097. */
  13098. __STATIC_FORCEINLINE unsigned long long __RV_URSUB64(unsigned long long a, unsigned long long b)
  13099. {
  13100. unsigned long long result;
  13101. __ASM volatile("ursub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13102. return result;
  13103. }
  13104. /* ===== Inline Function End for 3.184. URSUB64 ===== */
  13105. /* ===== Inline Function Start for 3.185. URSUBW ===== */
  13106. /**
  13107. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  13108. * \brief URSUBW (32-bit Unsigned Halving Subtraction)
  13109. * \details
  13110. * **Type**: DSP
  13111. *
  13112. * **Syntax**:\n
  13113. * ~~~
  13114. * URSUBW Rd, Rs1, Rs2
  13115. * ~~~
  13116. *
  13117. * **Purpose**:\n
  13118. * Subtract 32-bit unsigned integers and the result is halved to avoid overflow or saturation.
  13119. *
  13120. * **Description**:\n
  13121. * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit
  13122. * signed integer in Rs1. The result is first logically right-shifted by 1 bit and then sign-extended and
  13123. * written to Rd.
  13124. *
  13125. * **Examples**:\n
  13126. * ~~~
  13127. * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0xFFFFFFFF
  13128. * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x00000000
  13129. * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0x20000000
  13130. * ~~~
  13131. *
  13132. * **Operations**:\n
  13133. * ~~~
  13134. * * RV32:
  13135. * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1;
  13136. * * RV64:
  13137. * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1;
  13138. * Rd[63:0] = SE(resw[31:0]);
  13139. * ~~~
  13140. *
  13141. * \param [in] a unsigned int type of value stored in a
  13142. * \param [in] b unsigned int type of value stored in b
  13143. * \return value stored in unsigned long type
  13144. */
  13145. __STATIC_FORCEINLINE unsigned long __RV_URSUBW(unsigned int a, unsigned int b)
  13146. {
  13147. unsigned long result;
  13148. __ASM volatile("ursubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13149. return result;
  13150. }
  13151. /* ===== Inline Function End for 3.185. URSUBW ===== */
  13152. /* ===== Inline Function Start for 3.186. WEXTI ===== */
  13153. /**
  13154. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  13155. * \brief WEXTI (Extract Word from 64-bit Immediate)
  13156. * \details
  13157. * **Type**: DSP
  13158. *
  13159. * **Syntax**:\n
  13160. * ~~~
  13161. * WEXTI Rd, Rs1, #LSBloc
  13162. * ~~~
  13163. *
  13164. * **Purpose**:\n
  13165. * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or
  13166. * a register (RV64) starting from a specified immediate LSB bit position.
  13167. *
  13168. * **RV32 Description**:\n
  13169. * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified
  13170. * by Rs1(4,1) starting from a specified immediate LSB bit position, #LSBloc. The extracted word is
  13171. * written to Rd.
  13172. * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register
  13173. * pair includes register 2d and 2d+1.
  13174. * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d`
  13175. * register of the pair contains the low 32-bit of the 64-bit value.
  13176. *
  13177. * **RV64 Description**:\n
  13178. * This instruction extracts a 32-bit word from a 64-bit value in Rs1 starting from a specified
  13179. * immediate LSB bit position, #LSBloc. The extracted word is sign-extended and written to lower 32-
  13180. * bit of Rd.
  13181. *
  13182. * **Operations**:\n
  13183. * ~~~
  13184. * * RV32:
  13185. * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs2(4,1),1'b1);
  13186. * src[63:0] = Concat(R[Idx1], R[Idx0]);
  13187. * Rd = src[31+LSBloc:LSBloc];
  13188. * * RV64:
  13189. * ExtractW = Rs1[31+LSBloc:LSBloc];
  13190. * Rd = SE(ExtractW)
  13191. * ~~~
  13192. *
  13193. * \param [in] a long long type of value stored in a
  13194. * \param [in] b unsigned int type of value stored in b
  13195. * \return value stored in unsigned long type
  13196. */
  13197. #define __RV_WEXTI(a, b) \
  13198. ({ \
  13199. unsigned long result; \
  13200. long long __a = (long long)(a); \
  13201. __ASM volatile("wexti %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  13202. result; \
  13203. })
  13204. /* ===== Inline Function End for 3.186. WEXTI ===== */
  13205. /* ===== Inline Function Start for 3.187. WEXT ===== */
  13206. /**
  13207. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  13208. * \brief WEXT (Extract Word from 64-bit)
  13209. * \details
  13210. * **Type**: DSP
  13211. *
  13212. * **Syntax**:\n
  13213. * ~~~
  13214. * WEXT Rd, Rs1, Rs2
  13215. * ~~~
  13216. *
  13217. * **Purpose**:\n
  13218. * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or
  13219. * a register (RV64) starting from a specified LSB bit position in a register.
  13220. *
  13221. * **RV32 Description**:\n
  13222. * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified
  13223. * by Rs1(4,1) starting from a specified LSB bit position, specified in Rs2[4:0]. The extracted word is
  13224. * written to Rd.
  13225. * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register
  13226. * pair includes register 2d and 2d+1.
  13227. * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d`
  13228. * register of the pair contains the low 32-bit of the 64-bit value.
  13229. *
  13230. * **Operations**:\n
  13231. * ~~~
  13232. * * RV32:
  13233. * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1);
  13234. * src[63:0] = Concat(R[Idx1], R[Idx0]);
  13235. * LSBloc = Rs2[4:0];
  13236. * Rd = src[31+LSBloc:LSBloc];
  13237. * * RV64:
  13238. * LSBloc = Rs2[4:0];
  13239. * ExtractW = Rs1[31+LSBloc:LSBloc];
  13240. * Rd = SE(ExtractW)
  13241. * ~~~
  13242. *
  13243. * \param [in] a long long type of value stored in a
  13244. * \param [in] b unsigned int type of value stored in b
  13245. * \return value stored in unsigned long type
  13246. */
  13247. __STATIC_FORCEINLINE unsigned long __RV_WEXT(long long a, unsigned int b)
  13248. {
  13249. unsigned long result;
  13250. __ASM volatile("wext %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13251. return result;
  13252. }
  13253. /* ===== Inline Function End for 3.187. WEXT ===== */
  13254. /* ===== Inline Function Start for 3.188.1. ZUNPKD810 ===== */
  13255. /**
  13256. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  13257. * \brief ZUNPKD810 (Unsigned Unpacking Bytes 1 & 0)
  13258. * \details
  13259. * **Type**: DSP
  13260. *
  13261. * **Syntax**:\n
  13262. * ~~~
  13263. * ZUNPKD8xy Rd, Rs1
  13264. * xy = {10, 20, 30, 31, 32}
  13265. * ~~~
  13266. *
  13267. * **Purpose**:\n
  13268. * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
  13269. * halfwords of 32-bit chunks in a register.
  13270. *
  13271. * **Description**:\n
  13272. * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  13273. * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
  13274. * chunks in Rd.
  13275. *
  13276. * **Operations**:\n
  13277. * ~~~
  13278. * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
  13279. * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
  13280. * // ZUNPKD810, x=1,y=0
  13281. * // ZUNPKD820, x=2,y=0
  13282. * // ZUNPKD830, x=3,y=0
  13283. * // ZUNPKD831, x=3,y=1
  13284. * // ZUNPKD832, x=3,y=2
  13285. * for RV32: m=0,
  13286. * for RV64: m=1...0
  13287. * ~~~
  13288. *
  13289. * \param [in] a unsigned long type of value stored in a
  13290. * \return value stored in unsigned long type
  13291. */
  13292. __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD810(unsigned long a)
  13293. {
  13294. unsigned long result;
  13295. __ASM volatile("zunpkd810 %0, %1" : "=r"(result) : "r"(a));
  13296. return result;
  13297. }
  13298. /* ===== Inline Function End for 3.188.1. ZUNPKD810 ===== */
  13299. /* ===== Inline Function Start for 3.188.2. ZUNPKD820 ===== */
  13300. /**
  13301. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  13302. * \brief ZUNPKD820 (Unsigned Unpacking Bytes 2 & 0)
  13303. * \details
  13304. * **Type**: DSP
  13305. *
  13306. * **Syntax**:\n
  13307. * ~~~
  13308. * ZUNPKD8xy Rd, Rs1
  13309. * xy = {10, 20, 30, 31, 32}
  13310. * ~~~
  13311. *
  13312. * **Purpose**:\n
  13313. * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
  13314. * halfwords of 32-bit chunks in a register.
  13315. *
  13316. * **Description**:\n
  13317. * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  13318. * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
  13319. * chunks in Rd.
  13320. *
  13321. * **Operations**:\n
  13322. * ~~~
  13323. * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
  13324. * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
  13325. * // ZUNPKD810, x=1,y=0
  13326. * // ZUNPKD820, x=2,y=0
  13327. * // ZUNPKD830, x=3,y=0
  13328. * // ZUNPKD831, x=3,y=1
  13329. * // ZUNPKD832, x=3,y=2
  13330. * for RV32: m=0,
  13331. * for RV64: m=1...0
  13332. * ~~~
  13333. *
  13334. * \param [in] a unsigned long type of value stored in a
  13335. * \return value stored in unsigned long type
  13336. */
  13337. __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD820(unsigned long a)
  13338. {
  13339. unsigned long result;
  13340. __ASM volatile("zunpkd820 %0, %1" : "=r"(result) : "r"(a));
  13341. return result;
  13342. }
  13343. /* ===== Inline Function End for 3.188.2. ZUNPKD820 ===== */
  13344. /* ===== Inline Function Start for 3.188.3. ZUNPKD830 ===== */
  13345. /**
  13346. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  13347. * \brief ZUNPKD830 (Unsigned Unpacking Bytes 3 & 0)
  13348. * \details
  13349. * **Type**: DSP
  13350. *
  13351. * **Syntax**:\n
  13352. * ~~~
  13353. * ZUNPKD8xy Rd, Rs1
  13354. * xy = {10, 20, 30, 31, 32}
  13355. * ~~~
  13356. *
  13357. * **Purpose**:\n
  13358. * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
  13359. * halfwords of 32-bit chunks in a register.
  13360. *
  13361. * **Description**:\n
  13362. * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  13363. * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
  13364. * chunks in Rd.
  13365. *
  13366. * **Operations**:\n
  13367. * ~~~
  13368. * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
  13369. * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
  13370. * // ZUNPKD810, x=1,y=0
  13371. * // ZUNPKD820, x=2,y=0
  13372. * // ZUNPKD830, x=3,y=0
  13373. * // ZUNPKD831, x=3,y=1
  13374. * // ZUNPKD832, x=3,y=2
  13375. * for RV32: m=0,
  13376. * for RV64: m=1...0
  13377. * ~~~
  13378. *
  13379. * \param [in] a unsigned long type of value stored in a
  13380. * \return value stored in unsigned long type
  13381. */
  13382. __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD830(unsigned long a)
  13383. {
  13384. unsigned long result;
  13385. __ASM volatile("zunpkd830 %0, %1" : "=r"(result) : "r"(a));
  13386. return result;
  13387. }
  13388. /* ===== Inline Function End for 3.188.3. ZUNPKD830 ===== */
  13389. /* ===== Inline Function Start for 3.188.4. ZUNPKD831 ===== */
  13390. /**
  13391. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  13392. * \brief ZUNPKD831 (Unsigned Unpacking Bytes 3 & 1)
  13393. * \details
  13394. * **Type**: DSP
  13395. *
  13396. * **Syntax**:\n
  13397. * ~~~
  13398. * ZUNPKD8xy Rd, Rs1
  13399. * xy = {10, 20, 30, 31, 32}
  13400. * ~~~
  13401. *
  13402. * **Purpose**:\n
  13403. * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
  13404. * halfwords of 32-bit chunks in a register.
  13405. *
  13406. * **Description**:\n
  13407. * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  13408. * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
  13409. * chunks in Rd.
  13410. *
  13411. * **Operations**:\n
  13412. * ~~~
  13413. * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
  13414. * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
  13415. * // ZUNPKD810, x=1,y=0
  13416. * // ZUNPKD820, x=2,y=0
  13417. * // ZUNPKD830, x=3,y=0
  13418. * // ZUNPKD831, x=3,y=1
  13419. * // ZUNPKD832, x=3,y=2
  13420. * for RV32: m=0,
  13421. * for RV64: m=1...0
  13422. * ~~~
  13423. *
  13424. * \param [in] a unsigned long type of value stored in a
  13425. * \return value stored in unsigned long type
  13426. */
  13427. __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD831(unsigned long a)
  13428. {
  13429. unsigned long result;
  13430. __ASM volatile("zunpkd831 %0, %1" : "=r"(result) : "r"(a));
  13431. return result;
  13432. }
  13433. /* ===== Inline Function End for 3.188.4. ZUNPKD831 ===== */
  13434. /* ===== Inline Function Start for 3.188.5. ZUNPKD832 ===== */
  13435. /**
  13436. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  13437. * \brief ZUNPKD832 (Unsigned Unpacking Bytes 3 & 2)
  13438. * \details
  13439. * **Type**: DSP
  13440. *
  13441. * **Syntax**:\n
  13442. * ~~~
  13443. * ZUNPKD8xy Rd, Rs1
  13444. * xy = {10, 20, 30, 31, 32}
  13445. * ~~~
  13446. *
  13447. * **Purpose**:\n
  13448. * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
  13449. * halfwords of 32-bit chunks in a register.
  13450. *
  13451. * **Description**:\n
  13452. * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  13453. * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
  13454. * chunks in Rd.
  13455. *
  13456. * **Operations**:\n
  13457. * ~~~
  13458. * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
  13459. * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
  13460. * // ZUNPKD810, x=1,y=0
  13461. * // ZUNPKD820, x=2,y=0
  13462. * // ZUNPKD830, x=3,y=0
  13463. * // ZUNPKD831, x=3,y=1
  13464. * // ZUNPKD832, x=3,y=2
  13465. * for RV32: m=0,
  13466. * for RV64: m=1...0
  13467. * ~~~
  13468. *
  13469. * \param [in] a unsigned long type of value stored in a
  13470. * \return value stored in unsigned long type
  13471. */
  13472. __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD832(unsigned long a)
  13473. {
  13474. unsigned long result;
  13475. __ASM volatile("zunpkd832 %0, %1" : "=r"(result) : "r"(a));
  13476. return result;
  13477. }
  13478. /* ===== Inline Function End for 3.188.5. ZUNPKD832 ===== */
  13479. #if (__RISCV_XLEN == 64) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__)
  13480. /* ===== Inline Function Start for 4.1. ADD32 ===== */
  13481. /**
  13482. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  13483. * \brief ADD32 (SIMD 32-bit Addition)
  13484. * \details
  13485. * **Type**: SIMD (RV64 Only)
  13486. *
  13487. * **Syntax**:\n
  13488. * ~~~
  13489. * ADD32 Rd, Rs1, Rs2
  13490. * ~~~
  13491. *
  13492. * **Purpose**:\n
  13493. * Do 32-bit integer element additions simultaneously.
  13494. *
  13495. * **Description**:\n
  13496. * This instruction adds the 32-bit integer elements in Rs1 with the 32-bit integer
  13497. * elements in Rs2, and then writes the 32-bit element results to Rd.
  13498. *
  13499. * **Note**:\n
  13500. * This instruction can be used for either signed or unsigned addition.
  13501. *
  13502. * **Operations**:\n
  13503. * ~~~
  13504. * Rd.W[x] = Rs1.W[x] + Rs2.W[x];
  13505. * for RV64: x=1...0
  13506. * ~~~
  13507. *
  13508. * \param [in] a unsigned long type of value stored in a
  13509. * \param [in] b unsigned long type of value stored in b
  13510. * \return value stored in unsigned long type
  13511. */
  13512. __STATIC_FORCEINLINE unsigned long __RV_ADD32(unsigned long a, unsigned long b)
  13513. {
  13514. unsigned long result;
  13515. __ASM volatile("add32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13516. return result;
  13517. }
  13518. /* ===== Inline Function End for 4.1. ADD32 ===== */
  13519. /* ===== Inline Function Start for 4.2. CRAS32 ===== */
  13520. /**
  13521. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  13522. * \brief CRAS32 (SIMD 32-bit Cross Addition & Subtraction)
  13523. * \details
  13524. * **Type**: SIMD (RV64 Only)
  13525. *
  13526. * **Syntax**:\n
  13527. * ~~~
  13528. * CRAS32 Rd, Rs1, Rs2
  13529. * ~~~
  13530. *
  13531. * **Purpose**:\n
  13532. * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit
  13533. * chunk simultaneously. Operands are from crossed 32-bit elements.
  13534. *
  13535. * **Description**:\n
  13536. * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
  13537. * integer element in [31:0] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts
  13538. * the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and
  13539. * writes the result to [31:0] of Rd.
  13540. *
  13541. * **Note**:\n
  13542. * This instruction can be used for either signed or unsigned operations.
  13543. *
  13544. * **Operations**:\n
  13545. * ~~~
  13546. * Rd.W[1] = Rs1.W[1] + Rs2.W[0];
  13547. * Rd.W[0] = Rs1.W[0] - Rs2.W[1];
  13548. * ~~~
  13549. *
  13550. * \param [in] a unsigned long type of value stored in a
  13551. * \param [in] b unsigned long type of value stored in b
  13552. * \return value stored in unsigned long type
  13553. */
  13554. __STATIC_FORCEINLINE unsigned long __RV_CRAS32(unsigned long a, unsigned long b)
  13555. {
  13556. unsigned long result;
  13557. __ASM volatile("cras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13558. return result;
  13559. }
  13560. /* ===== Inline Function End for 4.2. CRAS32 ===== */
  13561. /* ===== Inline Function Start for 4.3. CRSA32 ===== */
  13562. /**
  13563. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  13564. * \brief CRSA32 (SIMD 32-bit Cross Subtraction & Addition)
  13565. * \details
  13566. * **Type**: SIMD (RV64 Only)
  13567. *
  13568. * **Syntax**:\n
  13569. * ~~~
  13570. * CRSA32 Rd, Rs1, Rs2
  13571. * ~~~
  13572. *
  13573. * **Purpose**:\n
  13574. * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit
  13575. * chunk simultaneously. Operands are from crossed 32-bit elements.
  13576. * *Description: *
  13577. * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element
  13578. * in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer
  13579. * element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2, and writes the result to
  13580. * [31:0] of Rd
  13581. *
  13582. * **Note**:\n
  13583. * This instruction can be used for either signed or unsigned operations.
  13584. *
  13585. * **Operations**:\n
  13586. * ~~~
  13587. * Rd.W[1] = Rs1.W[1] - Rs2.W[0];
  13588. * Rd.W[0] = Rs1.W[0] + Rs2.W[1];
  13589. * ~~~
  13590. *
  13591. * \param [in] a unsigned long type of value stored in a
  13592. * \param [in] b unsigned long type of value stored in b
  13593. * \return value stored in unsigned long type
  13594. */
  13595. __STATIC_FORCEINLINE unsigned long __RV_CRSA32(unsigned long a, unsigned long b)
  13596. {
  13597. unsigned long result;
  13598. __ASM volatile("crsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13599. return result;
  13600. }
  13601. /* ===== Inline Function End for 4.3. CRSA32 ===== */
  13602. /* ===== Inline Function Start for 4.4. KABS32 ===== */
  13603. /**
  13604. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
  13605. * \brief KABS32 (Scalar 32-bit Absolute Value with Saturation)
  13606. * \details
  13607. * **Type**: DSP (RV64 Only)
  13608. 24 20
  13609. 19 15
  13610. 14 12
  13611. 11 7
  13612. KABS32
  13613. 10010
  13614. Rs1
  13615. 000
  13616. Rd
  13617. 6 0
  13618. GE80B
  13619. 1111111
  13620. *
  13621. * **Syntax**:\n
  13622. * ~~~
  13623. * KABS32 Rd, Rs1
  13624. * ~~~
  13625. *
  13626. * **Purpose**:\n
  13627. * Get the absolute value of signed 32-bit integer elements in a general register.
  13628. *
  13629. * **Description**:\n
  13630. * This instruction calculates the absolute value of signed 32-bit integer elements stored
  13631. * in Rs1. The results are written to Rd. This instruction with the minimum negative integer input of
  13632. * 0x80000000 will produce a saturated output of maximum positive integer of 0x7fffffff and the OV
  13633. * flag will be set to 1.
  13634. *
  13635. * **Operations**:\n
  13636. * ~~~
  13637. * if (Rs1.W[x] >= 0) {
  13638. * res[x] = Rs1.W[x];
  13639. * } else {
  13640. * If (Rs1.W[x] == 0x80000000) {
  13641. * res[x] = 0x7fffffff;
  13642. * OV = 1;
  13643. * } else {
  13644. * res[x] = -Rs1.W[x];
  13645. * }
  13646. * }
  13647. * Rd.W[x] = res[x];
  13648. * for RV64: x=1...0
  13649. * ~~~
  13650. *
  13651. * \param [in] a unsigned long type of value stored in a
  13652. * \return value stored in unsigned long type
  13653. */
  13654. __STATIC_FORCEINLINE unsigned long __RV_KABS32(unsigned long a)
  13655. {
  13656. unsigned long result;
  13657. __ASM volatile("kabs32 %0, %1" : "=r"(result) : "r"(a));
  13658. return result;
  13659. }
  13660. /* ===== Inline Function End for 4.4. KABS32 ===== */
  13661. /* ===== Inline Function Start for 4.5. KADD32 ===== */
  13662. /**
  13663. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  13664. * \brief KADD32 (SIMD 32-bit Signed Saturating Addition)
  13665. * \details
  13666. * **Type**: SIMD (RV64 Only)
  13667. *
  13668. * **Syntax**:\n
  13669. * ~~~
  13670. * KADD32 Rd, Rs1, Rs2
  13671. * ~~~
  13672. *
  13673. * **Purpose**:\n
  13674. * Do 32-bit signed integer element saturating additions simultaneously.
  13675. *
  13676. * **Description**:\n
  13677. * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed
  13678. * integer elements in Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1),
  13679. * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
  13680. *
  13681. * **Operations**:\n
  13682. * ~~~
  13683. * res[x] = Rs1.W[x] + Rs2.W[x];
  13684. * if (res[x] > (2^31)-1) {
  13685. * res[x] = (2^31)-1;
  13686. * OV = 1;
  13687. * } else if (res[x] < -2^31) {
  13688. * res[x] = -2^31;
  13689. * OV = 1;
  13690. * }
  13691. * Rd.W[x] = res[x];
  13692. * for RV64: x=1...0
  13693. * ~~~
  13694. *
  13695. * \param [in] a unsigned long type of value stored in a
  13696. * \param [in] b unsigned long type of value stored in b
  13697. * \return value stored in unsigned long type
  13698. */
  13699. __STATIC_FORCEINLINE unsigned long __RV_KADD32(unsigned long a, unsigned long b)
  13700. {
  13701. unsigned long result;
  13702. __ASM volatile("kadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13703. return result;
  13704. }
  13705. /* ===== Inline Function End for 4.5. KADD32 ===== */
  13706. /* ===== Inline Function Start for 4.6. KCRAS32 ===== */
  13707. /**
  13708. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  13709. * \brief KCRAS32 (SIMD 32-bit Signed Saturating Cross Addition & Subtraction)
  13710. * \details
  13711. * **Type**: SIM (RV64 Only)
  13712. *
  13713. * **Syntax**:\n
  13714. * ~~~
  13715. * KCRAS32 Rd, Rs1, Rs2
  13716. * ~~~
  13717. *
  13718. * **Purpose**:\n
  13719. * Do 32-bit signed integer element saturating addition and 32-bit signed integer element
  13720. * saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.
  13721. *
  13722. * **Description**:\n
  13723. * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
  13724. * integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit integer element in [63:32] of
  13725. * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number
  13726. * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated
  13727. * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction.
  13728. *
  13729. * **Operations**:\n
  13730. * ~~~
  13731. * res[1] = Rs1.W[1] + Rs2.W[0];
  13732. * res[0] = Rs1.W[0] - Rs2.W[1];
  13733. * if (res[x] > (2^31)-1) {
  13734. * res[x] = (2^31)-1;
  13735. * OV = 1;
  13736. * } else if (res < -2^31) {
  13737. * res[x] = -2^31;
  13738. * OV = 1;
  13739. * }
  13740. * Rd.W[1] = res[1];
  13741. * Rd.W[0] = res[0];
  13742. * for RV64, x=1...0
  13743. * ~~~
  13744. *
  13745. * \param [in] a unsigned long type of value stored in a
  13746. * \param [in] b unsigned long type of value stored in b
  13747. * \return value stored in unsigned long type
  13748. */
  13749. __STATIC_FORCEINLINE unsigned long __RV_KCRAS32(unsigned long a, unsigned long b)
  13750. {
  13751. unsigned long result;
  13752. __ASM volatile("kcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13753. return result;
  13754. }
  13755. /* ===== Inline Function End for 4.6. KCRAS32 ===== */
  13756. /* ===== Inline Function Start for 4.7. KCRSA32 ===== */
  13757. /**
  13758. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  13759. * \brief KCRSA32 (SIMD 32-bit Signed Saturating Cross Subtraction & Addition)
  13760. * \details
  13761. * **Type**: SIMD (RV64 Only)
  13762. *
  13763. * **Syntax**:\n
  13764. * ~~~
  13765. * KCRSA32 Rd, Rs1, Rs2
  13766. * ~~~
  13767. *
  13768. * **Purpose**:\n
  13769. * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element
  13770. * saturating addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.
  13771. * *Description: *
  13772. * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element
  13773. * in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit
  13774. * integer element in [63:32] of Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31
  13775. * <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
  13776. * [63:32] of Rd for subtraction and [31:0] of Rd for addition.
  13777. *
  13778. * **Operations**:\n
  13779. * ~~~
  13780. * res[1] = Rs1.W[1] - Rs2.W[0];
  13781. * res[0] = Rs1.W[0] + Rs2.W[1];
  13782. * if (res[x] > (2^31)-1) {
  13783. * res[x] = (2^31)-1;
  13784. * OV = 1;
  13785. * } else if (res < -2^31) {
  13786. * res[x] = -2^31;
  13787. * OV = 1;
  13788. * }
  13789. * Rd.W[1] = res[1];
  13790. * Rd.W[0] = res[0];
  13791. * for RV64, x=1...0
  13792. * ~~~
  13793. *
  13794. * \param [in] a unsigned long type of value stored in a
  13795. * \param [in] b unsigned long type of value stored in b
  13796. * \return value stored in unsigned long type
  13797. */
  13798. __STATIC_FORCEINLINE unsigned long __RV_KCRSA32(unsigned long a, unsigned long b)
  13799. {
  13800. unsigned long result;
  13801. __ASM volatile("kcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13802. return result;
  13803. }
  13804. /* ===== Inline Function End for 4.7. KCRSA32 ===== */
  13805. /* ===== Inline Function Start for 4.8.1. KDMBB16 ===== */
  13806. /**
  13807. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  13808. * \brief KDMBB16 (SIMD Signed Saturating Double Multiply B16 x B16)
  13809. * \details
  13810. * **Type**: SIMD (RV64 only)
  13811. *
  13812. * **Syntax**:\n
  13813. * ~~~
  13814. * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  13815. * ~~~
  13816. *
  13817. * **Purpose**:\n
  13818. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  13819. * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
  13820. * in the destination register. If saturation happens, an overflow flag OV will be set.
  13821. *
  13822. * **Description**:\n
  13823. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  13824. * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
  13825. * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
  13826. * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
  13827. * and the overflow flag OV will be set.
  13828. *
  13829. * **Operations**:\n
  13830. * ~~~
  13831. * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
  13832. * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
  13833. * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
  13834. * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
  13835. * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
  13836. * Mresult[z] = aop[z] * bop[z];
  13837. * resQ31[z] = Mresult[z] << 1;
  13838. * } else {
  13839. * resQ31[z] = 0x7FFFFFFF;
  13840. * OV = 1;
  13841. * }
  13842. * Rd.W[z] = resQ31[z];
  13843. * ~~~
  13844. *
  13845. * \param [in] a unsigned long type of value stored in a
  13846. * \param [in] b unsigned long type of value stored in b
  13847. * \return value stored in unsigned long type
  13848. */
  13849. __STATIC_FORCEINLINE unsigned long __RV_KDMBB16(unsigned long a, unsigned long b)
  13850. {
  13851. unsigned long result;
  13852. __ASM volatile("kdmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13853. return result;
  13854. }
  13855. /* ===== Inline Function End for 4.8.1. KDMBB16 ===== */
  13856. /* ===== Inline Function Start for 4.8.2. KDMBT16 ===== */
  13857. /**
  13858. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  13859. * \brief KDMBT16 (SIMD Signed Saturating Double Multiply B16 x T16)
  13860. * \details
  13861. * **Type**: SIMD (RV64 only)
  13862. *
  13863. * **Syntax**:\n
  13864. * ~~~
  13865. * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  13866. * ~~~
  13867. *
  13868. * **Purpose**:\n
  13869. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  13870. * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
  13871. * in the destination register. If saturation happens, an overflow flag OV will be set.
  13872. *
  13873. * **Description**:\n
  13874. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  13875. * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
  13876. * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
  13877. * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
  13878. * and the overflow flag OV will be set.
  13879. *
  13880. * **Operations**:\n
  13881. * ~~~
  13882. * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
  13883. * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
  13884. * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
  13885. * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
  13886. * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
  13887. * Mresult[z] = aop[z] * bop[z];
  13888. * resQ31[z] = Mresult[z] << 1;
  13889. * } else {
  13890. * resQ31[z] = 0x7FFFFFFF;
  13891. * OV = 1;
  13892. * }
  13893. * Rd.W[z] = resQ31[z];
  13894. * ~~~
  13895. *
  13896. * \param [in] a unsigned long type of value stored in a
  13897. * \param [in] b unsigned long type of value stored in b
  13898. * \return value stored in unsigned long type
  13899. */
  13900. __STATIC_FORCEINLINE unsigned long __RV_KDMBT16(unsigned long a, unsigned long b)
  13901. {
  13902. unsigned long result;
  13903. __ASM volatile("kdmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13904. return result;
  13905. }
  13906. /* ===== Inline Function End for 4.8.2. KDMBT16 ===== */
  13907. /* ===== Inline Function Start for 4.8.3. KDMTT16 ===== */
  13908. /**
  13909. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  13910. * \brief KDMTT16 (SIMD Signed Saturating Double Multiply T16 x T16)
  13911. * \details
  13912. * **Type**: SIMD (RV64 only)
  13913. *
  13914. * **Syntax**:\n
  13915. * ~~~
  13916. * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  13917. * ~~~
  13918. *
  13919. * **Purpose**:\n
  13920. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  13921. * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
  13922. * in the destination register. If saturation happens, an overflow flag OV will be set.
  13923. *
  13924. * **Description**:\n
  13925. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  13926. * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
  13927. * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
  13928. * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
  13929. * and the overflow flag OV will be set.
  13930. *
  13931. * **Operations**:\n
  13932. * ~~~
  13933. * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
  13934. * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
  13935. * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
  13936. * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
  13937. * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
  13938. * Mresult[z] = aop[z] * bop[z];
  13939. * resQ31[z] = Mresult[z] << 1;
  13940. * } else {
  13941. * resQ31[z] = 0x7FFFFFFF;
  13942. * OV = 1;
  13943. * }
  13944. * Rd.W[z] = resQ31[z];
  13945. * ~~~
  13946. *
  13947. * \param [in] a unsigned long type of value stored in a
  13948. * \param [in] b unsigned long type of value stored in b
  13949. * \return value stored in unsigned long type
  13950. */
  13951. __STATIC_FORCEINLINE unsigned long __RV_KDMTT16(unsigned long a, unsigned long b)
  13952. {
  13953. unsigned long result;
  13954. __ASM volatile("kdmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13955. return result;
  13956. }
  13957. /* ===== Inline Function End for 4.8.3. KDMTT16 ===== */
  13958. /* ===== Inline Function Start for 4.9.1. KDMABB16 ===== */
  13959. /**
  13960. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  13961. * \brief KDMABB16 (SIMD Signed Saturating Double Multiply Addition B16 x B16)
  13962. * \details
  13963. * **Type**: SIMD (RV64 only)
  13964. *
  13965. * **Syntax**:\n
  13966. * ~~~
  13967. * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  13968. * ~~~
  13969. *
  13970. * **Purpose**:\n
  13971. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  13972. * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
  13973. * the values of the corresponding 32-bit chunks from the destination register and write the saturated
  13974. * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
  13975. * happens, an overflow flag OV will be set.
  13976. *
  13977. * **Description**:\n
  13978. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  13979. * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
  13980. * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
  13981. * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
  13982. * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
  13983. * are written back to Rd.
  13984. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
  13985. * set.
  13986. *
  13987. * **Operations**:\n
  13988. * ~~~
  13989. * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
  13990. * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
  13991. * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
  13992. * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
  13993. * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
  13994. * Mresult[z] = aop[z] * bop[z];
  13995. * resQ31[z] = Mresult[z] << 1;
  13996. * } else {
  13997. * resQ31[z] = 0x7FFFFFFF;
  13998. * OV = 1;
  13999. * }
  14000. * resadd[z] = Rd.W[z] + resQ31[z];
  14001. * if (resadd[z] > (2^31)-1) {
  14002. * resadd[z] = (2^31)-1;
  14003. * OV = 1;
  14004. * } else if (resadd[z] < -2^31) {
  14005. * resadd[z] = -2^31;
  14006. * OV = 1;
  14007. * }
  14008. * Rd.W[z] = resadd[z];
  14009. * ~~~
  14010. *
  14011. * \param [in] t unsigned long type of value stored in t
  14012. * \param [in] a unsigned long type of value stored in a
  14013. * \param [in] b unsigned long type of value stored in b
  14014. * \return value stored in unsigned long type
  14015. */
  14016. __STATIC_FORCEINLINE unsigned long __RV_KDMABB16(unsigned long t, unsigned long a, unsigned long b)
  14017. {
  14018. __ASM volatile("kdmabb16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14019. return t;
  14020. }
  14021. /* ===== Inline Function End for 4.9.1. KDMABB16 ===== */
  14022. /* ===== Inline Function Start for 4.9.2. KDMABT16 ===== */
  14023. /**
  14024. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  14025. * \brief KDMABT16 (SIMD Signed Saturating Double Multiply Addition B16 x T16)
  14026. * \details
  14027. * **Type**: SIMD (RV64 only)
  14028. *
  14029. * **Syntax**:\n
  14030. * ~~~
  14031. * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  14032. * ~~~
  14033. *
  14034. * **Purpose**:\n
  14035. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  14036. * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
  14037. * the values of the corresponding 32-bit chunks from the destination register and write the saturated
  14038. * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
  14039. * happens, an overflow flag OV will be set.
  14040. *
  14041. * **Description**:\n
  14042. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  14043. * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
  14044. * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
  14045. * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
  14046. * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
  14047. * are written back to Rd.
  14048. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
  14049. * set.
  14050. *
  14051. * **Operations**:\n
  14052. * ~~~
  14053. * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
  14054. * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
  14055. * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
  14056. * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
  14057. * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
  14058. * Mresult[z] = aop[z] * bop[z];
  14059. * resQ31[z] = Mresult[z] << 1;
  14060. * } else {
  14061. * resQ31[z] = 0x7FFFFFFF;
  14062. * OV = 1;
  14063. * }
  14064. * resadd[z] = Rd.W[z] + resQ31[z];
  14065. * if (resadd[z] > (2^31)-1) {
  14066. * resadd[z] = (2^31)-1;
  14067. * OV = 1;
  14068. * } else if (resadd[z] < -2^31) {
  14069. * resadd[z] = -2^31;
  14070. * OV = 1;
  14071. * }
  14072. * Rd.W[z] = resadd[z];
  14073. * ~~~
  14074. *
  14075. * \param [in] t unsigned long type of value stored in t
  14076. * \param [in] a unsigned long type of value stored in a
  14077. * \param [in] b unsigned long type of value stored in b
  14078. * \return value stored in unsigned long type
  14079. */
  14080. __STATIC_FORCEINLINE unsigned long __RV_KDMABT16(unsigned long t, unsigned long a, unsigned long b)
  14081. {
  14082. __ASM volatile("kdmabt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14083. return t;
  14084. }
  14085. /* ===== Inline Function End for 4.9.2. KDMABT16 ===== */
  14086. /* ===== Inline Function Start for 4.9.3. KDMATT16 ===== */
  14087. /**
  14088. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  14089. * \brief KDMATT16 (SIMD Signed Saturating Double Multiply Addition T16 x T16)
  14090. * \details
  14091. * **Type**: SIMD (RV64 only)
  14092. *
  14093. * **Syntax**:\n
  14094. * ~~~
  14095. * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  14096. * ~~~
  14097. *
  14098. * **Purpose**:\n
  14099. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  14100. * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
  14101. * the values of the corresponding 32-bit chunks from the destination register and write the saturated
  14102. * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
  14103. * happens, an overflow flag OV will be set.
  14104. *
  14105. * **Description**:\n
  14106. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  14107. * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
  14108. * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
  14109. * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
  14110. * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
  14111. * are written back to Rd.
  14112. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
  14113. * set.
  14114. *
  14115. * **Operations**:\n
  14116. * ~~~
  14117. * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
  14118. * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
  14119. * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
  14120. * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
  14121. * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
  14122. * Mresult[z] = aop[z] * bop[z];
  14123. * resQ31[z] = Mresult[z] << 1;
  14124. * } else {
  14125. * resQ31[z] = 0x7FFFFFFF;
  14126. * OV = 1;
  14127. * }
  14128. * resadd[z] = Rd.W[z] + resQ31[z];
  14129. * if (resadd[z] > (2^31)-1) {
  14130. * resadd[z] = (2^31)-1;
  14131. * OV = 1;
  14132. * } else if (resadd[z] < -2^31) {
  14133. * resadd[z] = -2^31;
  14134. * OV = 1;
  14135. * }
  14136. * Rd.W[z] = resadd[z];
  14137. * ~~~
  14138. *
  14139. * \param [in] t unsigned long type of value stored in t
  14140. * \param [in] a unsigned long type of value stored in a
  14141. * \param [in] b unsigned long type of value stored in b
  14142. * \return value stored in unsigned long type
  14143. */
  14144. __STATIC_FORCEINLINE unsigned long __RV_KDMATT16(unsigned long t, unsigned long a, unsigned long b)
  14145. {
  14146. __ASM volatile("kdmatt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14147. return t;
  14148. }
  14149. /* ===== Inline Function End for 4.9.3. KDMATT16 ===== */
  14150. /* ===== Inline Function Start for 4.10.1. KHMBB16 ===== */
  14151. /**
  14152. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  14153. * \brief KHMBB16 (SIMD Signed Saturating Half Multiply B16 x B16)
  14154. * \details
  14155. * **Type**: SIMD (RV64 Only)
  14156. *
  14157. * **Syntax**:\n
  14158. * ~~~
  14159. * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  14160. * ~~~
  14161. *
  14162. * **Purpose**:\n
  14163. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  14164. * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
  14165. * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
  14166. * overflow flag OV will be set.
  14167. *
  14168. * **Description**:\n
  14169. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  14170. * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
  14171. * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
  14172. * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
  14173. * to 0x7FFF and the overflow flag OV will be set.
  14174. *
  14175. * **Operations**:\n
  14176. * ~~~
  14177. * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
  14178. * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
  14179. * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
  14180. * aop = Rs1.H[x]; bop = Rs2.H[y];
  14181. * If (0x8000 != aop | 0x8000 != bop) {
  14182. * Mresult[31:0] = aop * bop;
  14183. * res[15:0] = Mresult[30:15];
  14184. * } else {
  14185. * res[15:0] = 0x7FFF;
  14186. * OV = 1;
  14187. * }
  14188. * Rd.W[z] = SE32(res[15:0]);
  14189. * ~~~
  14190. *
  14191. * \param [in] a unsigned long type of value stored in a
  14192. * \param [in] b unsigned long type of value stored in b
  14193. * \return value stored in unsigned long type
  14194. */
  14195. __STATIC_FORCEINLINE unsigned long __RV_KHMBB16(unsigned long a, unsigned long b)
  14196. {
  14197. unsigned long result;
  14198. __ASM volatile("khmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  14199. return result;
  14200. }
  14201. /* ===== Inline Function End for 4.10.1. KHMBB16 ===== */
  14202. /* ===== Inline Function Start for 4.10.2. KHMBT16 ===== */
  14203. /**
  14204. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  14205. * \brief KHMBT16 (SIMD Signed Saturating Half Multiply B16 x T16)
  14206. * \details
  14207. * **Type**: SIMD (RV64 Only)
  14208. *
  14209. * **Syntax**:\n
  14210. * ~~~
  14211. * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  14212. * ~~~
  14213. *
  14214. * **Purpose**:\n
  14215. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  14216. * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
  14217. * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
  14218. * overflow flag OV will be set.
  14219. *
  14220. * **Description**:\n
  14221. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  14222. * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
  14223. * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
  14224. * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
  14225. * to 0x7FFF and the overflow flag OV will be set.
  14226. *
  14227. * **Operations**:\n
  14228. * ~~~
  14229. * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
  14230. * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
  14231. * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
  14232. * aop = Rs1.H[x]; bop = Rs2.H[y];
  14233. * If (0x8000 != aop | 0x8000 != bop) {
  14234. * Mresult[31:0] = aop * bop;
  14235. * res[15:0] = Mresult[30:15];
  14236. * } else {
  14237. * res[15:0] = 0x7FFF;
  14238. * OV = 1;
  14239. * }
  14240. * Rd.W[z] = SE32(res[15:0]);
  14241. * ~~~
  14242. *
  14243. * \param [in] a unsigned long type of value stored in a
  14244. * \param [in] b unsigned long type of value stored in b
  14245. * \return value stored in unsigned long type
  14246. */
  14247. __STATIC_FORCEINLINE unsigned long __RV_KHMBT16(unsigned long a, unsigned long b)
  14248. {
  14249. unsigned long result;
  14250. __ASM volatile("khmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  14251. return result;
  14252. }
  14253. /* ===== Inline Function End for 4.10.2. KHMBT16 ===== */
  14254. /* ===== Inline Function Start for 4.10.3. KHMTT16 ===== */
  14255. /**
  14256. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  14257. * \brief KHMTT16 (SIMD Signed Saturating Half Multiply T16 x T16)
  14258. * \details
  14259. * **Type**: SIMD (RV64 Only)
  14260. *
  14261. * **Syntax**:\n
  14262. * ~~~
  14263. * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  14264. * ~~~
  14265. *
  14266. * **Purpose**:\n
  14267. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  14268. * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
  14269. * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
  14270. * overflow flag OV will be set.
  14271. *
  14272. * **Description**:\n
  14273. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  14274. * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
  14275. * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
  14276. * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
  14277. * to 0x7FFF and the overflow flag OV will be set.
  14278. *
  14279. * **Operations**:\n
  14280. * ~~~
  14281. * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
  14282. * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
  14283. * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
  14284. * aop = Rs1.H[x]; bop = Rs2.H[y];
  14285. * If (0x8000 != aop | 0x8000 != bop) {
  14286. * Mresult[31:0] = aop * bop;
  14287. * res[15:0] = Mresult[30:15];
  14288. * } else {
  14289. * res[15:0] = 0x7FFF;
  14290. * OV = 1;
  14291. * }
  14292. * Rd.W[z] = SE32(res[15:0]);
  14293. * ~~~
  14294. *
  14295. * \param [in] a unsigned long type of value stored in a
  14296. * \param [in] b unsigned long type of value stored in b
  14297. * \return value stored in unsigned long type
  14298. */
  14299. __STATIC_FORCEINLINE unsigned long __RV_KHMTT16(unsigned long a, unsigned long b)
  14300. {
  14301. unsigned long result;
  14302. __ASM volatile("khmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  14303. return result;
  14304. }
  14305. /* ===== Inline Function End for 4.10.3. KHMTT16 ===== */
  14306. /* ===== Inline Function Start for 4.11.1. KMABB32 ===== */
  14307. /**
  14308. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
  14309. * \brief KMABB32 (Saturating Signed Multiply Bottom Words & Add)
  14310. * \details
  14311. * **Type**: DSP (RV64 Only)
  14312. *
  14313. * **Syntax**:\n
  14314. * ~~~
  14315. * KMABB32 Rd, Rs1, Rs2
  14316. * KMABT32 Rd, Rs1, Rs2
  14317. * KMATT32 Rd, Rs1, Rs2
  14318. * ~~~
  14319. *
  14320. * **Purpose**:\n
  14321. * Multiply the signed 32-bit element in a register with the 32-bit element in another register
  14322. * and add the result to the content of 64-bit data in the third register. The addition result may be
  14323. * saturated and is written to the third register.
  14324. * * KMABB32: rd + bottom*bottom
  14325. * * KMABT32: rd + bottom*top
  14326. * * KMATT32: rd + top*top
  14327. *
  14328. * **Description**:\n
  14329. * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
  14330. * element in Rs2.
  14331. * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
  14332. * element in Rs2.
  14333. * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
  14334. * element in Rs2.
  14335. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
  14336. * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
  14337. * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
  14338. * integers.
  14339. *
  14340. * **Operations**:\n
  14341. * ~~~
  14342. * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
  14343. * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
  14344. * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
  14345. * if (res > (2^63)-1) {
  14346. * res = (2^63)-1;
  14347. * OV = 1;
  14348. * } else if (res < -2^63) {
  14349. * res = -2^63;
  14350. * OV = 1;
  14351. * }
  14352. * Rd = res;
  14353. * *Exceptions:* None
  14354. * ~~~
  14355. *
  14356. * \param [in] t long type of value stored in t
  14357. * \param [in] a unsigned long type of value stored in a
  14358. * \param [in] b unsigned long type of value stored in b
  14359. * \return value stored in long type
  14360. */
  14361. __STATIC_FORCEINLINE long __RV_KMABB32(long t, unsigned long a, unsigned long b)
  14362. {
  14363. __ASM volatile("kmabb32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14364. return t;
  14365. }
  14366. /* ===== Inline Function End for 4.11.1. KMABB32 ===== */
  14367. /* ===== Inline Function Start for 4.11.2. KMABT32 ===== */
  14368. /**
  14369. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
  14370. * \brief KMABT32 (Saturating Signed Multiply Bottom & Top Words & Add)
  14371. * \details
  14372. * **Type**: DSP (RV64 Only)
  14373. *
  14374. * **Syntax**:\n
  14375. * ~~~
  14376. * KMABB32 Rd, Rs1, Rs2
  14377. * KMABT32 Rd, Rs1, Rs2
  14378. * KMATT32 Rd, Rs1, Rs2
  14379. * ~~~
  14380. *
  14381. * **Purpose**:\n
  14382. * Multiply the signed 32-bit element in a register with the 32-bit element in another register
  14383. * and add the result to the content of 64-bit data in the third register. The addition result may be
  14384. * saturated and is written to the third register.
  14385. * * KMABB32: rd + bottom*bottom
  14386. * * KMABT32: rd + bottom*top
  14387. * * KMATT32: rd + top*top
  14388. *
  14389. * **Description**:\n
  14390. * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
  14391. * element in Rs2.
  14392. * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
  14393. * element in Rs2.
  14394. * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
  14395. * element in Rs2.
  14396. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
  14397. * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
  14398. * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
  14399. * integers.
  14400. *
  14401. * **Operations**:\n
  14402. * ~~~
  14403. * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
  14404. * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
  14405. * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
  14406. * if (res > (2^63)-1) {
  14407. * res = (2^63)-1;
  14408. * OV = 1;
  14409. * } else if (res < -2^63) {
  14410. * res = -2^63;
  14411. * OV = 1;
  14412. * }
  14413. * Rd = res;
  14414. * *Exceptions:* None
  14415. * ~~~
  14416. *
  14417. * \param [in] t long type of value stored in t
  14418. * \param [in] a unsigned long type of value stored in a
  14419. * \param [in] b unsigned long type of value stored in b
  14420. * \return value stored in long type
  14421. */
  14422. __STATIC_FORCEINLINE long __RV_KMABT32(long t, unsigned long a, unsigned long b)
  14423. {
  14424. __ASM volatile("kmabt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14425. return t;
  14426. }
  14427. /* ===== Inline Function End for 4.11.2. KMABT32 ===== */
  14428. /* ===== Inline Function Start for 4.11.3. KMATT32 ===== */
  14429. /**
  14430. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
  14431. * \brief KMATT32 (Saturating Signed Multiply Top Words & Add)
  14432. * \details
  14433. * **Type**: DSP (RV64 Only)
  14434. *
  14435. * **Syntax**:\n
  14436. * ~~~
  14437. * KMABB32 Rd, Rs1, Rs2
  14438. * KMABT32 Rd, Rs1, Rs2
  14439. * KMATT32 Rd, Rs1, Rs2
  14440. * ~~~
  14441. *
  14442. * **Purpose**:\n
  14443. * Multiply the signed 32-bit element in a register with the 32-bit element in another register
  14444. * and add the result to the content of 64-bit data in the third register. The addition result may be
  14445. * saturated and is written to the third register.
  14446. * * KMABB32: rd + bottom*bottom
  14447. * * KMABT32: rd + bottom*top
  14448. * * KMATT32: rd + top*top
  14449. *
  14450. * **Description**:\n
  14451. * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
  14452. * element in Rs2.
  14453. * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
  14454. * element in Rs2.
  14455. * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
  14456. * element in Rs2.
  14457. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
  14458. * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
  14459. * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
  14460. * integers.
  14461. *
  14462. * **Operations**:\n
  14463. * ~~~
  14464. * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
  14465. * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
  14466. * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
  14467. * if (res > (2^63)-1) {
  14468. * res = (2^63)-1;
  14469. * OV = 1;
  14470. * } else if (res < -2^63) {
  14471. * res = -2^63;
  14472. * OV = 1;
  14473. * }
  14474. * Rd = res;
  14475. * *Exceptions:* None
  14476. * ~~~
  14477. *
  14478. * \param [in] t long type of value stored in t
  14479. * \param [in] a unsigned long type of value stored in a
  14480. * \param [in] b unsigned long type of value stored in b
  14481. * \return value stored in long type
  14482. */
  14483. __STATIC_FORCEINLINE long __RV_KMATT32(long t, unsigned long a, unsigned long b)
  14484. {
  14485. __ASM volatile("kmatt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14486. return t;
  14487. }
  14488. /* ===== Inline Function End for 4.11.3. KMATT32 ===== */
  14489. /* ===== Inline Function Start for 4.12.1. KMADA32 ===== */
  14490. /**
  14491. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14492. * \brief KMADA32 (Saturating Signed Multiply Two Words and Two Adds)
  14493. * \details
  14494. * **Type**: DSP (RV64 Only)
  14495. *
  14496. * **Syntax**:\n
  14497. * ~~~
  14498. * KMADA32 Rd, Rs1, Rs2
  14499. * KMAXDA32 Rd, Rs1, Rs2
  14500. * ~~~
  14501. *
  14502. * **Purpose**:\n
  14503. * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the
  14504. * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated.
  14505. * * KMADA32: rd + top*top + bottom*bottom
  14506. * * KMAXDA32: rd + top*bottom + bottom*top
  14507. *
  14508. * **Description**:\n
  14509. * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-
  14510. * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1
  14511. * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction.
  14512. * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit
  14513. * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
  14514. * with the top 32-bit element in Rs2.
  14515. * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63
  14516. * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit
  14517. * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  14518. *
  14519. * **Operations**:\n
  14520. * ~~~
  14521. * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32
  14522. * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32
  14523. * if (res > (2^63)-1) {
  14524. * res = (2^63)-1;
  14525. * OV = 1;
  14526. * } else if (res < -2^63) {
  14527. * res = -2^63;
  14528. * OV = 1;
  14529. * }
  14530. * Rd = res;
  14531. * ~~~
  14532. *
  14533. * \param [in] t long type of value stored in t
  14534. * \param [in] a unsigned long type of value stored in a
  14535. * \param [in] b unsigned long type of value stored in b
  14536. * \return value stored in long type
  14537. */
  14538. __STATIC_FORCEINLINE long __RV_KMADA32(long t, unsigned long a, unsigned long b)
  14539. {
  14540. __ASM volatile("kmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14541. return t;
  14542. }
  14543. /* ===== Inline Function End for 4.12.1. KMADA32 ===== */
  14544. /* ===== Inline Function Start for 4.12.2. KMAXDA32 ===== */
  14545. /**
  14546. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14547. * \brief KMAXDA32 (Saturating Signed Crossed Multiply Two Words and Two Adds)
  14548. * \details
  14549. * **Type**: DSP (RV64 Only)
  14550. *
  14551. * **Syntax**:\n
  14552. * ~~~
  14553. * KMADA32 Rd, Rs1, Rs2
  14554. * KMAXDA32 Rd, Rs1, Rs2
  14555. * ~~~
  14556. *
  14557. * **Purpose**:\n
  14558. * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the
  14559. * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated.
  14560. * * KMADA32: rd + top*top + bottom*bottom
  14561. * * KMAXDA32: rd + top*bottom + bottom*top
  14562. *
  14563. * **Description**:\n
  14564. * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-
  14565. * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1
  14566. * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction.
  14567. * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit
  14568. * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
  14569. * with the top 32-bit element in Rs2.
  14570. * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63
  14571. * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit
  14572. * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  14573. *
  14574. * **Operations**:\n
  14575. * ~~~
  14576. * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32
  14577. * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32
  14578. * if (res > (2^63)-1) {
  14579. * res = (2^63)-1;
  14580. * OV = 1;
  14581. * } else if (res < -2^63) {
  14582. * res = -2^63;
  14583. * OV = 1;
  14584. * }
  14585. * Rd = res;
  14586. * ~~~
  14587. *
  14588. * \param [in] t long type of value stored in t
  14589. * \param [in] a unsigned long type of value stored in a
  14590. * \param [in] b unsigned long type of value stored in b
  14591. * \return value stored in long type
  14592. */
  14593. __STATIC_FORCEINLINE long __RV_KMAXDA32(long t, unsigned long a, unsigned long b)
  14594. {
  14595. __ASM volatile("kmaxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14596. return t;
  14597. }
  14598. /* ===== Inline Function End for 4.12.2. KMAXDA32 ===== */
  14599. /* ===== Inline Function Start for 4.13.1. KMDA32 ===== */
  14600. /**
  14601. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14602. * \brief KMDA32 (Signed Multiply Two Words and Add)
  14603. * \details
  14604. * **Type**: DSP (RV64 Only)
  14605. *
  14606. * **Syntax**:\n
  14607. * ~~~
  14608. * KMDA32 Rd, Rs1, Rs2
  14609. * KMXDA32 Rd, Rs1, Rs2
  14610. * ~~~
  14611. *
  14612. * **Purpose**:\n
  14613. * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
  14614. * adds the two 64-bit results together. The addition result may be saturated.
  14615. * * KMDA32: top*top + bottom*bottom
  14616. * * KMXDA32: top*bottom + bottom*top
  14617. *
  14618. * **Description**:\n
  14619. * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  14620. * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
  14621. * with the top 32-bit element of Rs2.
  14622. * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  14623. * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
  14624. * with the bottom 32-bit element of Rs2.
  14625. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1.
  14626. * The final result is written to Rd. The 32-bit contents are treated as signed integers.
  14627. *
  14628. * **Operations**:\n
  14629. * ~~~
  14630. * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) {
  14631. * Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32
  14632. * Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32
  14633. * } else {
  14634. * Rd = 0x7fffffffffffffff;
  14635. * OV = 1;
  14636. * }
  14637. * ~~~
  14638. *
  14639. * \param [in] a unsigned long type of value stored in a
  14640. * \param [in] b unsigned long type of value stored in b
  14641. * \return value stored in long type
  14642. */
  14643. __STATIC_FORCEINLINE long __RV_KMDA32(unsigned long a, unsigned long b)
  14644. {
  14645. long result;
  14646. __ASM volatile("kmda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  14647. return result;
  14648. }
  14649. /* ===== Inline Function End for 4.13.1. KMDA32 ===== */
  14650. /* ===== Inline Function Start for 4.13.2. KMXDA32 ===== */
  14651. /**
  14652. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14653. * \brief KMXDA32 (Signed Crossed Multiply Two Words and Add)
  14654. * \details
  14655. * **Type**: DSP (RV64 Only)
  14656. *
  14657. * **Syntax**:\n
  14658. * ~~~
  14659. * KMDA32 Rd, Rs1, Rs2
  14660. * KMXDA32 Rd, Rs1, Rs2
  14661. * ~~~
  14662. *
  14663. * **Purpose**:\n
  14664. * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
  14665. * adds the two 64-bit results together. The addition result may be saturated.
  14666. * * KMDA32: top*top + bottom*bottom
  14667. * * KMXDA32: top*bottom + bottom*top
  14668. *
  14669. * **Description**:\n
  14670. * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  14671. * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
  14672. * with the top 32-bit element of Rs2.
  14673. * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  14674. * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
  14675. * with the bottom 32-bit element of Rs2.
  14676. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1.
  14677. * The final result is written to Rd. The 32-bit contents are treated as signed integers.
  14678. *
  14679. * **Operations**:\n
  14680. * ~~~
  14681. * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) {
  14682. * Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32
  14683. * Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32
  14684. * } else {
  14685. * Rd = 0x7fffffffffffffff;
  14686. * OV = 1;
  14687. * }
  14688. * ~~~
  14689. *
  14690. * \param [in] a unsigned long type of value stored in a
  14691. * \param [in] b unsigned long type of value stored in b
  14692. * \return value stored in long type
  14693. */
  14694. __STATIC_FORCEINLINE long __RV_KMXDA32(unsigned long a, unsigned long b)
  14695. {
  14696. long result;
  14697. __ASM volatile("kmxda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  14698. return result;
  14699. }
  14700. /* ===== Inline Function End for 4.13.2. KMXDA32 ===== */
  14701. /* ===== Inline Function Start for 4.14.1. KMADS32 ===== */
  14702. /**
  14703. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14704. * \brief KMADS32 (Saturating Signed Multiply Two Words & Subtract & Add)
  14705. * \details
  14706. * **Type**: DSP (RV64 Only)
  14707. *
  14708. * **Syntax**:\n
  14709. * ~~~
  14710. * KMADS32 Rd, Rs1, Rs2
  14711. * KMADRS32 Rd, Rs1, Rs2
  14712. * KMAXDS32 Rd, Rs1, Rs2
  14713. * ~~~
  14714. *
  14715. * **Purpose**:\n
  14716. * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
  14717. * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
  14718. * 64-bit data in a third register. The addition result may be saturated.
  14719. * * KMADS32: rd + (top*top - bottom*bottom)
  14720. * * KMADRS32: rd + (bottom*bottom - top*top)
  14721. * * KMAXDS32: rd + (top*bottom - bottom*top)
  14722. *
  14723. * **Description**:\n
  14724. * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
  14725. * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
  14726. * Rs1 with the top 32-bit element in Rs2.
  14727. * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
  14728. * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  14729. * element in Rs1 with the bottom 32-bit element in Rs2.
  14730. * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
  14731. * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
  14732. * Rs1 with the bottom 32-bit element in Rs2.
  14733. * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
  14734. * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
  14735. * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
  14736. * as signed integers.
  14737. *
  14738. * **Operations**:\n
  14739. * ~~~
  14740. * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
  14741. * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
  14742. * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
  14743. * if (res > (2^63)-1) {
  14744. * res = (2^63)-1;
  14745. * OV = 1;
  14746. * } else if (res < -2^63) {
  14747. * res = -2^63;
  14748. * OV = 1;
  14749. * }
  14750. * Rd = res;
  14751. * ~~~
  14752. *
  14753. * \param [in] t long type of value stored in t
  14754. * \param [in] a unsigned long type of value stored in a
  14755. * \param [in] b unsigned long type of value stored in b
  14756. * \return value stored in long type
  14757. */
  14758. __STATIC_FORCEINLINE long __RV_KMADS32(long t, unsigned long a, unsigned long b)
  14759. {
  14760. __ASM volatile("kmads32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14761. return t;
  14762. }
  14763. /* ===== Inline Function End for 4.14.1. KMADS32 ===== */
  14764. /* ===== Inline Function Start for 4.14.2. KMADRS32 ===== */
  14765. /**
  14766. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14767. * \brief KMADRS32 (Saturating Signed Multiply Two Words & Reverse Subtract & Add)
  14768. * \details
  14769. * **Type**: DSP (RV64 Only)
  14770. *
  14771. * **Syntax**:\n
  14772. * ~~~
  14773. * KMADS32 Rd, Rs1, Rs2
  14774. * KMADRS32 Rd, Rs1, Rs2
  14775. * KMAXDS32 Rd, Rs1, Rs2
  14776. * ~~~
  14777. *
  14778. * **Purpose**:\n
  14779. * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
  14780. * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
  14781. * 64-bit data in a third register. The addition result may be saturated.
  14782. * * KMADS32: rd + (top*top - bottom*bottom)
  14783. * * KMADRS32: rd + (bottom*bottom - top*top)
  14784. * * KMAXDS32: rd + (top*bottom - bottom*top)
  14785. *
  14786. * **Description**:\n
  14787. * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
  14788. * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
  14789. * Rs1 with the top 32-bit element in Rs2.
  14790. * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
  14791. * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  14792. * element in Rs1 with the bottom 32-bit element in Rs2.
  14793. * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
  14794. * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
  14795. * Rs1 with the bottom 32-bit element in Rs2.
  14796. * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
  14797. * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
  14798. * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
  14799. * as signed integers.
  14800. *
  14801. * **Operations**:\n
  14802. * ~~~
  14803. * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
  14804. * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
  14805. * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
  14806. * if (res > (2^63)-1) {
  14807. * res = (2^63)-1;
  14808. * OV = 1;
  14809. * } else if (res < -2^63) {
  14810. * res = -2^63;
  14811. * OV = 1;
  14812. * }
  14813. * Rd = res;
  14814. * ~~~
  14815. *
  14816. * \param [in] t long type of value stored in t
  14817. * \param [in] a unsigned long type of value stored in a
  14818. * \param [in] b unsigned long type of value stored in b
  14819. * \return value stored in long type
  14820. */
  14821. __STATIC_FORCEINLINE long __RV_KMADRS32(long t, unsigned long a, unsigned long b)
  14822. {
  14823. __ASM volatile("kmadrs32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14824. return t;
  14825. }
  14826. /* ===== Inline Function End for 4.14.2. KMADRS32 ===== */
  14827. /* ===== Inline Function Start for 4.14.3. KMAXDS32 ===== */
  14828. /**
  14829. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14830. * \brief KMAXDS32 (Saturating Signed Crossed Multiply Two Words & Subtract & Add)
  14831. * \details
  14832. * **Type**: DSP (RV64 Only)
  14833. *
  14834. * **Syntax**:\n
  14835. * ~~~
  14836. * KMADS32 Rd, Rs1, Rs2
  14837. * KMADRS32 Rd, Rs1, Rs2
  14838. * KMAXDS32 Rd, Rs1, Rs2
  14839. * ~~~
  14840. *
  14841. * **Purpose**:\n
  14842. * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
  14843. * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
  14844. * 64-bit data in a third register. The addition result may be saturated.
  14845. * * KMADS32: rd + (top*top - bottom*bottom)
  14846. * * KMADRS32: rd + (bottom*bottom - top*top)
  14847. * * KMAXDS32: rd + (top*bottom - bottom*top)
  14848. *
  14849. * **Description**:\n
  14850. * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
  14851. * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
  14852. * Rs1 with the top 32-bit element in Rs2.
  14853. * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
  14854. * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  14855. * element in Rs1 with the bottom 32-bit element in Rs2.
  14856. * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
  14857. * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
  14858. * Rs1 with the bottom 32-bit element in Rs2.
  14859. * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
  14860. * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
  14861. * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
  14862. * as signed integers.
  14863. *
  14864. * **Operations**:\n
  14865. * ~~~
  14866. * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
  14867. * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
  14868. * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
  14869. * if (res > (2^63)-1) {
  14870. * res = (2^63)-1;
  14871. * OV = 1;
  14872. * } else if (res < -2^63) {
  14873. * res = -2^63;
  14874. * OV = 1;
  14875. * }
  14876. * Rd = res;
  14877. * ~~~
  14878. *
  14879. * \param [in] t long type of value stored in t
  14880. * \param [in] a unsigned long type of value stored in a
  14881. * \param [in] b unsigned long type of value stored in b
  14882. * \return value stored in long type
  14883. */
  14884. __STATIC_FORCEINLINE long __RV_KMAXDS32(long t, unsigned long a, unsigned long b)
  14885. {
  14886. __ASM volatile("kmaxds32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14887. return t;
  14888. }
  14889. /* ===== Inline Function End for 4.14.3. KMAXDS32 ===== */
  14890. /* ===== Inline Function Start for 4.15.1. KMSDA32 ===== */
  14891. /**
  14892. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14893. * \brief KMSDA32 (Saturating Signed Multiply Two Words & Add & Subtract)
  14894. * \details
  14895. * **Type**: DSP (RV64 Only)
  14896. *
  14897. * **Syntax**:\n
  14898. * ~~~
  14899. * KMSDA32 Rd, Rs1, Rs2
  14900. * KMSXDA32 Rd, Rs1, Rs2
  14901. * ~~~
  14902. *
  14903. * **Purpose**:\n
  14904. * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
  14905. * subtracts the two 64-bit results from a third register. The subtraction result may be saturated.
  14906. * * KMSDA: rd - top*top - bottom*bottom
  14907. * * KMSXDA: rd - top*bottom - bottom*top
  14908. *
  14909. * **Description**:\n
  14910. * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  14911. * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
  14912. * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  14913. * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
  14914. * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction
  14915. * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit
  14916. * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed
  14917. * integers.
  14918. *
  14919. * **Operations**:\n
  14920. * ~~~
  14921. * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32
  14922. * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32
  14923. * if (res > (2^63)-1) {
  14924. * res = (2^63)-1;
  14925. * OV = 1;
  14926. * } else if (res < -2^63) {
  14927. * res = -2^63;
  14928. * OV = 1;
  14929. * }
  14930. * Rd = res;
  14931. * ~~~
  14932. *
  14933. * \param [in] t long type of value stored in t
  14934. * \param [in] a unsigned long type of value stored in a
  14935. * \param [in] b unsigned long type of value stored in b
  14936. * \return value stored in long type
  14937. */
  14938. __STATIC_FORCEINLINE long __RV_KMSDA32(long t, unsigned long a, unsigned long b)
  14939. {
  14940. __ASM volatile("kmsda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14941. return t;
  14942. }
  14943. /* ===== Inline Function End for 4.15.1. KMSDA32 ===== */
  14944. /* ===== Inline Function Start for 4.15.2. KMSXDA32 ===== */
  14945. /**
  14946. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14947. * \brief KMSXDA32 (Saturating Signed Crossed Multiply Two Words & Add & Subtract)
  14948. * \details
  14949. * **Type**: DSP (RV64 Only)
  14950. *
  14951. * **Syntax**:\n
  14952. * ~~~
  14953. * KMSDA32 Rd, Rs1, Rs2
  14954. * KMSXDA32 Rd, Rs1, Rs2
  14955. * ~~~
  14956. *
  14957. * **Purpose**:\n
  14958. * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
  14959. * subtracts the two 64-bit results from a third register. The subtraction result may be saturated.
  14960. * * KMSDA: rd - top*top - bottom*bottom
  14961. * * KMSXDA: rd - top*bottom - bottom*top
  14962. *
  14963. * **Description**:\n
  14964. * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  14965. * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
  14966. * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  14967. * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
  14968. * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction
  14969. * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit
  14970. * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed
  14971. * integers.
  14972. *
  14973. * **Operations**:\n
  14974. * ~~~
  14975. * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32
  14976. * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32
  14977. * if (res > (2^63)-1) {
  14978. * res = (2^63)-1;
  14979. * OV = 1;
  14980. * } else if (res < -2^63) {
  14981. * res = -2^63;
  14982. * OV = 1;
  14983. * }
  14984. * Rd = res;
  14985. * ~~~
  14986. *
  14987. * \param [in] t long type of value stored in t
  14988. * \param [in] a unsigned long type of value stored in a
  14989. * \param [in] b unsigned long type of value stored in b
  14990. * \return value stored in long type
  14991. */
  14992. __STATIC_FORCEINLINE long __RV_KMSXDA32(long t, unsigned long a, unsigned long b)
  14993. {
  14994. __ASM volatile("kmsxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14995. return t;
  14996. }
  14997. /* ===== Inline Function End for 4.15.2. KMSXDA32 ===== */
  14998. /* ===== Inline Function Start for 4.16. KSLL32 ===== */
  14999. /**
  15000. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  15001. * \brief KSLL32 (SIMD 32-bit Saturating Shift Left Logical)
  15002. * \details
  15003. * **Type**: SIMD (RV64 Only)
  15004. *
  15005. * **Syntax**:\n
  15006. * ~~~
  15007. * KSLL32 Rd, Rs1, Rs2
  15008. * ~~~
  15009. *
  15010. * **Purpose**:\n
  15011. * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift
  15012. * amount is a variable from a GPR.
  15013. *
  15014. * **Description**:\n
  15015. * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
  15016. * with zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register.
  15017. * Any shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is
  15018. * saturated to -2^31. And the saturated results are written to Rd. If any saturation is performed, set OV
  15019. * bit to 1.
  15020. *
  15021. * **Operations**:\n
  15022. * ~~~
  15023. * sa = Rs2[4:0];
  15024. * if (sa != 0) {
  15025. * res[(31+sa):0] = Rs1.W[x] << sa;
  15026. * if (res > (2^31)-1) {
  15027. * res = 0x7fffffff; OV = 1;
  15028. * } else if (res < -2^31) {
  15029. * res = 0x80000000; OV = 1;
  15030. * }
  15031. * Rd.W[x] = res[31:0];
  15032. * } else {
  15033. * Rd = Rs1;
  15034. * }
  15035. * for RV64: x=1...0
  15036. * ~~~
  15037. *
  15038. * \param [in] a unsigned long type of value stored in a
  15039. * \param [in] b unsigned int type of value stored in b
  15040. * \return value stored in unsigned long type
  15041. */
  15042. __STATIC_FORCEINLINE unsigned long __RV_KSLL32(unsigned long a, unsigned int b)
  15043. {
  15044. unsigned long result;
  15045. __ASM volatile("ksll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15046. return result;
  15047. }
  15048. /* ===== Inline Function End for 4.16. KSLL32 ===== */
  15049. /* ===== Inline Function Start for 4.17. KSLLI32 ===== */
  15050. /**
  15051. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  15052. * \brief KSLLI32 (SIMD 32-bit Saturating Shift Left Logical Immediate)
  15053. * \details
  15054. * **Type**: SIMD (RV64 Only)
  15055. *
  15056. * **Syntax**:\n
  15057. * ~~~
  15058. * KSLLI32 Rd, Rs1, imm5u
  15059. * ~~~
  15060. *
  15061. * **Purpose**:\n
  15062. * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift
  15063. * amount is an immediate value.
  15064. *
  15065. * **Description**:\n
  15066. * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
  15067. * with zero and the shift amount is specified by the imm5u constant. Any shifted value greater than
  15068. * 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated
  15069. * results are written to Rd. If any saturation is performed, set OV bit to 1.
  15070. *
  15071. * **Operations**:\n
  15072. * ~~~
  15073. * sa = imm5u[4:0];
  15074. * if (sa != 0) {
  15075. * res[(31+sa):0] = Rs1.W[x] << sa;
  15076. * if (res > (2^31)-1) {
  15077. * res = 0x7fffffff; OV = 1;
  15078. * } else if (res < -2^31) {
  15079. * res = 0x80000000; OV = 1;
  15080. * }
  15081. * Rd.W[x] = res[31:0];
  15082. * } else {
  15083. * Rd = Rs1;
  15084. * }
  15085. * for RV64: x=1...0
  15086. * ~~~
  15087. *
  15088. * \param [in] a unsigned long type of value stored in a
  15089. * \param [in] b unsigned int type of value stored in b
  15090. * \return value stored in unsigned long type
  15091. */
  15092. #define __RV_KSLLI32(a, b) \
  15093. ({ \
  15094. unsigned long result; \
  15095. unsigned long __a = (unsigned long)(a); \
  15096. __ASM volatile("kslli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  15097. result; \
  15098. })
  15099. /* ===== Inline Function End for 4.17. KSLLI32 ===== */
  15100. /* ===== Inline Function Start for 4.18.1. KSLRA32 ===== */
  15101. /**
  15102. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  15103. * \brief KSLRA32 (SIMD 32-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
  15104. * \details
  15105. * **Type**: SIMD (RV64 Only)
  15106. *
  15107. * **Syntax**:\n
  15108. * ~~~
  15109. * KSLRA32 Rd, Rs1, Rs2
  15110. * KSLRA32.u Rd, Rs1, Rs2
  15111. * ~~~
  15112. *
  15113. * **Purpose**:\n
  15114. * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  15115. * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the
  15116. * right shift.
  15117. *
  15118. * **Description**:\n
  15119. * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  15120. * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
  15121. * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
  15122. * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be
  15123. * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`.
  15124. * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u`
  15125. * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
  15126. * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
  15127. * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect
  15128. * this instruction.
  15129. *
  15130. * **Operations**:\n
  15131. * ~~~
  15132. * if (Rs2[5:0] < 0) {
  15133. * sa = -Rs2[5:0];
  15134. * sa = (sa == 32)? 31 : sa;
  15135. * if (`.u` form) {
  15136. * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
  15137. * Rd.W[x] = res[31:0];
  15138. * } else {
  15139. * Rd.W[x] = SE32(Rs1.W[x][31:sa]);
  15140. * }
  15141. * } else {
  15142. * sa = Rs2[4:0];
  15143. * res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
  15144. * if (res > (2^31)-1) {
  15145. * res[31:0] = 0x7fffffff; OV = 1;
  15146. * } else if (res < -2^31) {
  15147. * res[31:0] = 0x80000000; OV = 1;
  15148. * }
  15149. * Rd.W[x] = res[31:0];
  15150. * }
  15151. * for RV64: x=1...0
  15152. * ~~~
  15153. *
  15154. * \param [in] a unsigned long type of value stored in a
  15155. * \param [in] b int type of value stored in b
  15156. * \return value stored in unsigned long type
  15157. */
  15158. __STATIC_FORCEINLINE unsigned long __RV_KSLRA32(unsigned long a, int b)
  15159. {
  15160. unsigned long result;
  15161. __ASM volatile("kslra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15162. return result;
  15163. }
  15164. /* ===== Inline Function End for 4.18.1. KSLRA32 ===== */
  15165. /* ===== Inline Function Start for 4.18.2. KSLRA32.u ===== */
  15166. /**
  15167. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  15168. * \brief KSLRA32.u (SIMD 32-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
  15169. * \details
  15170. * **Type**: SIMD (RV64 Only)
  15171. *
  15172. * **Syntax**:\n
  15173. * ~~~
  15174. * KSLRA32 Rd, Rs1, Rs2
  15175. * KSLRA32.u Rd, Rs1, Rs2
  15176. * ~~~
  15177. *
  15178. * **Purpose**:\n
  15179. * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  15180. * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the
  15181. * right shift.
  15182. *
  15183. * **Description**:\n
  15184. * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  15185. * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
  15186. * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
  15187. * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be
  15188. * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`.
  15189. * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u`
  15190. * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
  15191. * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
  15192. * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect
  15193. * this instruction.
  15194. *
  15195. * **Operations**:\n
  15196. * ~~~
  15197. * if (Rs2[5:0] < 0) {
  15198. * sa = -Rs2[5:0];
  15199. * sa = (sa == 32)? 31 : sa;
  15200. * if (`.u` form) {
  15201. * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
  15202. * Rd.W[x] = res[31:0];
  15203. * } else {
  15204. * Rd.W[x] = SE32(Rs1.W[x][31:sa]);
  15205. * }
  15206. * } else {
  15207. * sa = Rs2[4:0];
  15208. * res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
  15209. * if (res > (2^31)-1) {
  15210. * res[31:0] = 0x7fffffff; OV = 1;
  15211. * } else if (res < -2^31) {
  15212. * res[31:0] = 0x80000000; OV = 1;
  15213. * }
  15214. * Rd.W[x] = res[31:0];
  15215. * }
  15216. * for RV64: x=1...0
  15217. * ~~~
  15218. *
  15219. * \param [in] a unsigned long type of value stored in a
  15220. * \param [in] b int type of value stored in b
  15221. * \return value stored in unsigned long type
  15222. */
  15223. __STATIC_FORCEINLINE unsigned long __RV_KSLRA32_U(unsigned long a, int b)
  15224. {
  15225. unsigned long result;
  15226. __ASM volatile("kslra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15227. return result;
  15228. }
  15229. /* ===== Inline Function End for 4.18.2. KSLRA32.u ===== */
  15230. /* ===== Inline Function Start for 4.19. KSTAS32 ===== */
  15231. /**
  15232. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15233. * \brief KSTAS32 (SIMD 32-bit Signed Saturating Straight Addition & Subtraction)
  15234. * \details
  15235. * **Type**: SIMD (RV64 Only)
  15236. *
  15237. * **Syntax**:\n
  15238. * ~~~
  15239. * KSTAS32 Rd, Rs1, Rs2
  15240. * ~~~
  15241. *
  15242. * **Purpose**:\n
  15243. * Do 32-bit signed integer element saturating addition and 32-bit signed integer element
  15244. * saturating subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit
  15245. * elements.
  15246. *
  15247. * **Description**:\n
  15248. * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
  15249. * integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit integer element in [31:0] of
  15250. * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number
  15251. * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated
  15252. * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction.
  15253. *
  15254. * **Operations**:\n
  15255. * ~~~
  15256. * res[1] = Rs1.W[1] + Rs2.W[1];
  15257. * res[0] = Rs1.W[0] - Rs2.W[0];
  15258. * if (res[x] > (2^31)-1) {
  15259. * res[x] = (2^31)-1;
  15260. * OV = 1;
  15261. * } else if (res < -2^31) {
  15262. * res[x] = -2^31;
  15263. * OV = 1;
  15264. * }
  15265. * Rd.W[1] = res[1];
  15266. * Rd.W[0] = res[0];
  15267. * for RV64, x=1...0
  15268. * ~~~
  15269. *
  15270. * \param [in] a unsigned long type of value stored in a
  15271. * \param [in] b unsigned long type of value stored in b
  15272. * \return value stored in unsigned long type
  15273. */
  15274. __STATIC_FORCEINLINE unsigned long __RV_KSTAS32(unsigned long a, unsigned long b)
  15275. {
  15276. unsigned long result;
  15277. __ASM volatile("kstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15278. return result;
  15279. }
  15280. /* ===== Inline Function End for 4.19. KSTAS32 ===== */
  15281. /* ===== Inline Function Start for 4.20. KSTSA32 ===== */
  15282. /**
  15283. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15284. * \brief KSTSA32 (SIMD 32-bit Signed Saturating Straight Subtraction & Addition)
  15285. * \details
  15286. * **Type**: SIM (RV64 Only)
  15287. *
  15288. * **Syntax**:\n
  15289. * ~~~
  15290. * KSTSA32 Rd, Rs1, Rs2
  15291. * ~~~
  15292. *
  15293. * **Purpose**:\n
  15294. * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element
  15295. * saturating addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit
  15296. * elements.
  15297. * *Description: *
  15298. * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer
  15299. * element in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with
  15300. * the 32-bit integer element in [31:0] of Rs2. If any of the results are beyond the Q31 number range (
  15301. * -2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
  15302. * written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
  15303. *
  15304. * **Operations**:\n
  15305. * ~~~
  15306. * res[1] = Rs1.W[1] - Rs2.W[1];
  15307. * res[0] = Rs1.W[0] + Rs2.W[0];
  15308. * if (res[x] > (2^31)-1) {
  15309. * res[x] = (2^31)-1;
  15310. * OV = 1;
  15311. * } else if (res < -2^31) {
  15312. * res[x] = -2^31;
  15313. * OV = 1;
  15314. * }
  15315. * Rd.W[1] = res[1];
  15316. * Rd.W[0] = res[0];
  15317. * for RV64, x=1...0
  15318. * ~~~
  15319. *
  15320. * \param [in] a unsigned long type of value stored in a
  15321. * \param [in] b unsigned long type of value stored in b
  15322. * \return value stored in unsigned long type
  15323. */
  15324. __STATIC_FORCEINLINE unsigned long __RV_KSTSA32(unsigned long a, unsigned long b)
  15325. {
  15326. unsigned long result;
  15327. __ASM volatile("kstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15328. return result;
  15329. }
  15330. /* ===== Inline Function End for 4.20. KSTSA32 ===== */
  15331. /* ===== Inline Function Start for 4.21. KSUB32 ===== */
  15332. /**
  15333. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15334. * \brief KSUB32 (SIMD 32-bit Signed Saturating Subtraction)
  15335. * \details
  15336. * **Type**: SIMD (RV64 Only)
  15337. *
  15338. * **Syntax**:\n
  15339. * ~~~
  15340. * KSUB32 Rd, Rs1, Rs2
  15341. * ~~~
  15342. *
  15343. * **Purpose**:\n
  15344. * Do 32-bit signed integer elements saturating subtractions simultaneously.
  15345. *
  15346. * **Description**:\n
  15347. * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit
  15348. * signed integer elements in Rs1. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <=
  15349. * 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
  15350. * Rd.
  15351. *
  15352. * **Operations**:\n
  15353. * ~~~
  15354. * res[x] = Rs1.W[x] - Rs2.W[x];
  15355. * if (res[x] > (2^31)-1) {
  15356. * res[x] = (2^31)-1;
  15357. * OV = 1;
  15358. * } else if (res[x] < -2^31) {
  15359. * res[x] = -2^31;
  15360. * OV = 1;
  15361. * }
  15362. * Rd.W[x] = res[x];
  15363. * for RV64: x=1...0
  15364. * ~~~
  15365. *
  15366. * \param [in] a unsigned long type of value stored in a
  15367. * \param [in] b unsigned long type of value stored in b
  15368. * \return value stored in unsigned long type
  15369. */
  15370. __STATIC_FORCEINLINE unsigned long __RV_KSUB32(unsigned long a, unsigned long b)
  15371. {
  15372. unsigned long result;
  15373. __ASM volatile("ksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15374. return result;
  15375. }
  15376. /* ===== Inline Function End for 4.21. KSUB32 ===== */
  15377. /* ===== Inline Function Start for 4.22.1. PKBB32 ===== */
  15378. /**
  15379. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
  15380. * \brief PKBB32 (Pack Two 32-bit Data from Both Bottom Half)
  15381. * \details
  15382. * **Type**: DSP (RV64 Only)
  15383. *
  15384. * **Syntax**:\n
  15385. * ~~~
  15386. * PKBB32 Rd, Rs1, Rs2
  15387. * PKBT32 Rd, Rs1, Rs2
  15388. * PKTT32 Rd, Rs1, Rs2
  15389. * PKTB32 Rd, Rs1, Rs2
  15390. * ~~~
  15391. *
  15392. * **Purpose**:\n
  15393. * Pack 32-bit data from 64-bit chunks in two registers.
  15394. * * PKBB32: bottom.bottom
  15395. * * PKBT32: bottom.top
  15396. * * PKTT32: top.top
  15397. * * PKTB32: top.bottom
  15398. *
  15399. * **Description**:\n
  15400. * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15401. * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15402. * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15403. * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15404. *
  15405. * **Operations**:\n
  15406. * ~~~
  15407. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
  15408. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
  15409. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
  15410. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
  15411. * ~~~
  15412. *
  15413. * \param [in] a unsigned long type of value stored in a
  15414. * \param [in] b unsigned long type of value stored in b
  15415. * \return value stored in unsigned long type
  15416. */
  15417. __STATIC_FORCEINLINE unsigned long __RV_PKBB32(unsigned long a, unsigned long b)
  15418. {
  15419. unsigned long result;
  15420. __ASM volatile("pkbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15421. return result;
  15422. }
  15423. /* ===== Inline Function End for 4.22.1. PKBB32 ===== */
  15424. /* ===== Inline Function Start for 4.22.2. PKBT32 ===== */
  15425. /**
  15426. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
  15427. * \brief PKBT32 (Pack Two 32-bit Data from Bottom and Top Half)
  15428. * \details
  15429. * **Type**: DSP (RV64 Only)
  15430. *
  15431. * **Syntax**:\n
  15432. * ~~~
  15433. * PKBB32 Rd, Rs1, Rs2
  15434. * PKBT32 Rd, Rs1, Rs2
  15435. * PKTT32 Rd, Rs1, Rs2
  15436. * PKTB32 Rd, Rs1, Rs2
  15437. * ~~~
  15438. *
  15439. * **Purpose**:\n
  15440. * Pack 32-bit data from 64-bit chunks in two registers.
  15441. * * PKBB32: bottom.bottom
  15442. * * PKBT32: bottom.top
  15443. * * PKTT32: top.top
  15444. * * PKTB32: top.bottom
  15445. *
  15446. * **Description**:\n
  15447. * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15448. * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15449. * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15450. * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15451. *
  15452. * **Operations**:\n
  15453. * ~~~
  15454. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
  15455. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
  15456. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
  15457. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
  15458. * ~~~
  15459. *
  15460. * \param [in] a unsigned long type of value stored in a
  15461. * \param [in] b unsigned long type of value stored in b
  15462. * \return value stored in unsigned long type
  15463. */
  15464. __STATIC_FORCEINLINE unsigned long __RV_PKBT32(unsigned long a, unsigned long b)
  15465. {
  15466. unsigned long result;
  15467. __ASM volatile("pkbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15468. return result;
  15469. }
  15470. /* ===== Inline Function End for 4.22.2. PKBT32 ===== */
  15471. /* ===== Inline Function Start for 4.22.3. PKTT32 ===== */
  15472. /**
  15473. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
  15474. * \brief PKTT32 (Pack Two 32-bit Data from Both Top Half)
  15475. * \details
  15476. * **Type**: DSP (RV64 Only)
  15477. *
  15478. * **Syntax**:\n
  15479. * ~~~
  15480. * PKBB32 Rd, Rs1, Rs2
  15481. * PKBT32 Rd, Rs1, Rs2
  15482. * PKTT32 Rd, Rs1, Rs2
  15483. * PKTB32 Rd, Rs1, Rs2
  15484. * ~~~
  15485. *
  15486. * **Purpose**:\n
  15487. * Pack 32-bit data from 64-bit chunks in two registers.
  15488. * * PKBB32: bottom.bottom
  15489. * * PKBT32: bottom.top
  15490. * * PKTT32: top.top
  15491. * * PKTB32: top.bottom
  15492. *
  15493. * **Description**:\n
  15494. * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15495. * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15496. * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15497. * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15498. *
  15499. * **Operations**:\n
  15500. * ~~~
  15501. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
  15502. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
  15503. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
  15504. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
  15505. * ~~~
  15506. *
  15507. * \param [in] a unsigned long type of value stored in a
  15508. * \param [in] b unsigned long type of value stored in b
  15509. * \return value stored in unsigned long type
  15510. */
  15511. __STATIC_FORCEINLINE unsigned long __RV_PKTT32(unsigned long a, unsigned long b)
  15512. {
  15513. unsigned long result;
  15514. __ASM volatile("pktt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15515. return result;
  15516. }
  15517. /* ===== Inline Function End for 4.22.3. PKTT32 ===== */
  15518. /* ===== Inline Function Start for 4.22.4. PKTB32 ===== */
  15519. /**
  15520. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
  15521. * \brief PKTB32 (Pack Two 32-bit Data from Top and Bottom Half)
  15522. * \details
  15523. * **Type**: DSP (RV64 Only)
  15524. *
  15525. * **Syntax**:\n
  15526. * ~~~
  15527. * PKBB32 Rd, Rs1, Rs2
  15528. * PKBT32 Rd, Rs1, Rs2
  15529. * PKTT32 Rd, Rs1, Rs2
  15530. * PKTB32 Rd, Rs1, Rs2
  15531. * ~~~
  15532. *
  15533. * **Purpose**:\n
  15534. * Pack 32-bit data from 64-bit chunks in two registers.
  15535. * * PKBB32: bottom.bottom
  15536. * * PKBT32: bottom.top
  15537. * * PKTT32: top.top
  15538. * * PKTB32: top.bottom
  15539. *
  15540. * **Description**:\n
  15541. * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15542. * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15543. * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15544. * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15545. *
  15546. * **Operations**:\n
  15547. * ~~~
  15548. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
  15549. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
  15550. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
  15551. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
  15552. * ~~~
  15553. *
  15554. * \param [in] a unsigned long type of value stored in a
  15555. * \param [in] b unsigned long type of value stored in b
  15556. * \return value stored in unsigned long type
  15557. */
  15558. __STATIC_FORCEINLINE unsigned long __RV_PKTB32(unsigned long a, unsigned long b)
  15559. {
  15560. unsigned long result;
  15561. __ASM volatile("pktb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15562. return result;
  15563. }
  15564. /* ===== Inline Function End for 4.22.4. PKTB32 ===== */
  15565. /* ===== Inline Function Start for 4.23. RADD32 ===== */
  15566. /**
  15567. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15568. * \brief RADD32 (SIMD 32-bit Signed Halving Addition)
  15569. * \details
  15570. * **Type**: SIMD (RV64 Only)
  15571. *
  15572. * **Syntax**:\n
  15573. * ~~~
  15574. * RADD32 Rd, Rs1, Rs2
  15575. * ~~~
  15576. *
  15577. * **Purpose**:\n
  15578. * Do 32-bit signed integer element additions simultaneously. The results are halved to avoid
  15579. * overflow or saturation.
  15580. *
  15581. * **Description**:\n
  15582. * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed
  15583. * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
  15584. * Rd.
  15585. *
  15586. * **Examples**:\n
  15587. * ~~~
  15588. * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF Rd = 0x7FFFFFFF
  15589. * * Rs1 = 0x80000000, Rs2 = 0x80000000 Rd = 0x80000000
  15590. * * Rs1 = 0x40000000, Rs2 = 0x80000000 Rd = 0xE0000000
  15591. * ~~~
  15592. *
  15593. * **Operations**:\n
  15594. * ~~~
  15595. * Rd.W[x] = (Rs1.W[x] + Rs2.W[x]) s>> 1;
  15596. * for RV64: x=1...0
  15597. * ~~~
  15598. *
  15599. * \param [in] a unsigned long type of value stored in a
  15600. * \param [in] b unsigned long type of value stored in b
  15601. * \return value stored in unsigned long type
  15602. */
  15603. __STATIC_FORCEINLINE unsigned long __RV_RADD32(unsigned long a, unsigned long b)
  15604. {
  15605. unsigned long result;
  15606. __ASM volatile("radd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15607. return result;
  15608. }
  15609. /* ===== Inline Function End for 4.23. RADD32 ===== */
  15610. /* ===== Inline Function Start for 4.24. RCRAS32 ===== */
  15611. /**
  15612. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15613. * \brief RCRAS32 (SIMD 32-bit Signed Halving Cross Addition & Subtraction)
  15614. * \details
  15615. * **Type**: SIMD (RV64 Only)
  15616. *
  15617. * **Syntax**:\n
  15618. * ~~~
  15619. * RCRAS32 Rd, Rs1, Rs2
  15620. * ~~~
  15621. *
  15622. * **Purpose**:\n
  15623. * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in
  15624. * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to
  15625. * avoid overflow or saturation.
  15626. *
  15627. * **Description**:\n
  15628. * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit
  15629. * signed integer element in [31:0] of Rs2, and subtracts the 32-bit signed integer element in [63:32] of
  15630. * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first
  15631. * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd
  15632. * for subtraction.
  15633. *
  15634. * **Examples**:\n
  15635. * ~~~
  15636. * Please see `RADD32` and `RSUB32` instructions.
  15637. * ~~~
  15638. *
  15639. * **Operations**:\n
  15640. * ~~~
  15641. * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) s>> 1;
  15642. * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) s>> 1;
  15643. * ~~~
  15644. *
  15645. * \param [in] a unsigned long type of value stored in a
  15646. * \param [in] b unsigned long type of value stored in b
  15647. * \return value stored in unsigned long type
  15648. */
  15649. __STATIC_FORCEINLINE unsigned long __RV_RCRAS32(unsigned long a, unsigned long b)
  15650. {
  15651. unsigned long result;
  15652. __ASM volatile("rcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15653. return result;
  15654. }
  15655. /* ===== Inline Function End for 4.24. RCRAS32 ===== */
  15656. /* ===== Inline Function Start for 4.25. RCRSA32 ===== */
  15657. /**
  15658. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15659. * \brief RCRSA32 (SIMD 32-bit Signed Halving Cross Subtraction & Addition)
  15660. * \details
  15661. * **Type**: SIMD (RV64 Only)
  15662. *
  15663. * **Syntax**:\n
  15664. * ~~~
  15665. * RCRSA32 Rd, Rs1, Rs2
  15666. * ~~~
  15667. *
  15668. * **Purpose**:\n
  15669. * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in
  15670. * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to
  15671. * avoid overflow or saturation.
  15672. *
  15673. * **Description**:\n
  15674. * This instruction subtracts the 32-bit signed integer element in [31:0] of Rs2 from the
  15675. * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0]
  15676. * of Rs1 with the 32-bit signed integer element in [63:32] of Rs2. The two results are first
  15677. * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of
  15678. * Rd for addition.
  15679. *
  15680. * **Examples**:\n
  15681. * ~~~
  15682. * Please see `RADD32` and `RSUB32` instructions.
  15683. * ~~~
  15684. *
  15685. * **Operations**:\n
  15686. * ~~~
  15687. * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) s>> 1;
  15688. * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) s>> 1;
  15689. * ~~~
  15690. *
  15691. * \param [in] a unsigned long type of value stored in a
  15692. * \param [in] b unsigned long type of value stored in b
  15693. * \return value stored in unsigned long type
  15694. */
  15695. __STATIC_FORCEINLINE unsigned long __RV_RCRSA32(unsigned long a, unsigned long b)
  15696. {
  15697. unsigned long result;
  15698. __ASM volatile("rcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15699. return result;
  15700. }
  15701. /* ===== Inline Function End for 4.25. RCRSA32 ===== */
  15702. /* ===== Inline Function Start for 4.26. RSTAS32 ===== */
  15703. /**
  15704. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15705. * \brief RSTAS32 (SIMD 32-bit Signed Halving Straight Addition & Subtraction)
  15706. * \details
  15707. * **Type**: SIMD (RV64 Only)
  15708. *
  15709. * **Syntax**:\n
  15710. * ~~~
  15711. * RSTAS32 Rd, Rs1, Rs2
  15712. * ~~~
  15713. *
  15714. * **Purpose**:\n
  15715. * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in
  15716. * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are
  15717. * halved to avoid overflow or saturation.
  15718. *
  15719. * **Description**:\n
  15720. * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit
  15721. * signed integer element in [63:32] of Rs2, and subtracts the 32-bit signed integer element in [31:0] of
  15722. * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first
  15723. * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd
  15724. * for subtraction.
  15725. *
  15726. * **Examples**:\n
  15727. * ~~~
  15728. * Please see `RADD32` and `RSUB32` instructions.
  15729. * ~~~
  15730. *
  15731. * **Operations**:\n
  15732. * ~~~
  15733. * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) s>> 1;
  15734. * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) s>> 1;
  15735. * ~~~
  15736. *
  15737. * \param [in] a unsigned long type of value stored in a
  15738. * \param [in] b unsigned long type of value stored in b
  15739. * \return value stored in unsigned long type
  15740. */
  15741. __STATIC_FORCEINLINE unsigned long __RV_RSTAS32(unsigned long a, unsigned long b)
  15742. {
  15743. unsigned long result;
  15744. __ASM volatile("rstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15745. return result;
  15746. }
  15747. /* ===== Inline Function End for 4.26. RSTAS32 ===== */
  15748. /* ===== Inline Function Start for 4.27. RSTSA32 ===== */
  15749. /**
  15750. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15751. * \brief RSTSA32 (SIMD 32-bit Signed Halving Straight Subtraction & Addition)
  15752. * \details
  15753. * **Type**: SIMD (RV64 Only)
  15754. *
  15755. * **Syntax**:\n
  15756. * ~~~
  15757. * RSTSA32 Rd, Rs1, Rs2
  15758. * ~~~
  15759. *
  15760. * **Purpose**:\n
  15761. * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in
  15762. * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are
  15763. * halved to avoid overflow or saturation.
  15764. *
  15765. * **Description**:\n
  15766. * This instruction subtracts the 32-bit signed integer element in [63:32] of Rs2 from the
  15767. * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0]
  15768. * of Rs1 with the 32-bit signed integer element in [31:0] of Rs2. The two results are first arithmetically
  15769. * right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
  15770. *
  15771. * **Examples**:\n
  15772. * ~~~
  15773. * Please see `RADD32` and `RSUB32` instructions.
  15774. * ~~~
  15775. *
  15776. * **Operations**:\n
  15777. * ~~~
  15778. * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) s>> 1;
  15779. * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) s>> 1;
  15780. * ~~~
  15781. *
  15782. * \param [in] a unsigned long type of value stored in a
  15783. * \param [in] b unsigned long type of value stored in b
  15784. * \return value stored in unsigned long type
  15785. */
  15786. __STATIC_FORCEINLINE unsigned long __RV_RSTSA32(unsigned long a, unsigned long b)
  15787. {
  15788. unsigned long result;
  15789. __ASM volatile("rstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15790. return result;
  15791. }
  15792. /* ===== Inline Function End for 4.27. RSTSA32 ===== */
  15793. /* ===== Inline Function Start for 4.28. RSUB32 ===== */
  15794. /**
  15795. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15796. * \brief RSUB32 (SIMD 32-bit Signed Halving Subtraction)
  15797. * \details
  15798. * **Type**: SIMD (RV64 Only)
  15799. *
  15800. * **Syntax**:\n
  15801. * ~~~
  15802. * RSUB32 Rd, Rs1, Rs2
  15803. * ~~~
  15804. *
  15805. * **Purpose**:\n
  15806. * Do 32-bit signed integer element subtractions simultaneously. The results are halved to
  15807. * avoid overflow or saturation.
  15808. *
  15809. * **Description**:\n
  15810. * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit
  15811. * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
  15812. * written to Rd.
  15813. *
  15814. * **Examples**:\n
  15815. * ~~~
  15816. * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0x7FFFFFFF
  15817. * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x80000000
  15818. * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0xA0000000
  15819. * ~~~
  15820. *
  15821. * **Operations**:\n
  15822. * ~~~
  15823. * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) s>> 1;
  15824. * for RV64: x=1...0
  15825. * ~~~
  15826. *
  15827. * \param [in] a unsigned long type of value stored in a
  15828. * \param [in] b unsigned long type of value stored in b
  15829. * \return value stored in unsigned long type
  15830. */
  15831. __STATIC_FORCEINLINE unsigned long __RV_RSUB32(unsigned long a, unsigned long b)
  15832. {
  15833. unsigned long result;
  15834. __ASM volatile("rsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15835. return result;
  15836. }
  15837. /* ===== Inline Function End for 4.28. RSUB32 ===== */
  15838. /* ===== Inline Function Start for 4.29. SLL32 ===== */
  15839. /**
  15840. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  15841. * \brief SLL32 (SIMD 32-bit Shift Left Logical)
  15842. * \details
  15843. * **Type**: SIMD (RV64 Only)
  15844. *
  15845. * **Syntax**:\n
  15846. * ~~~
  15847. * SLL32 Rd, Rs1, Rs2
  15848. * ~~~
  15849. *
  15850. * **Purpose**:\n
  15851. * Do 32-bit elements logical left shift operations simultaneously. The shift amount is a
  15852. * variable from a GPR.
  15853. *
  15854. * **Description**:\n
  15855. * The 32-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
  15856. * The shifted out bits are filled with zero and the shift amount is specified by the low-order 5-bits of
  15857. * the value in the Rs2 register.
  15858. *
  15859. * **Operations**:\n
  15860. * ~~~
  15861. * sa = Rs2[4:0];
  15862. * Rd.W[x] = Rs1.W[x] << sa;
  15863. * for RV64: x=1...0
  15864. * ~~~
  15865. *
  15866. * \param [in] a unsigned long type of value stored in a
  15867. * \param [in] b unsigned int type of value stored in b
  15868. * \return value stored in unsigned long type
  15869. */
  15870. __STATIC_FORCEINLINE unsigned long __RV_SLL32(unsigned long a, unsigned int b)
  15871. {
  15872. unsigned long result;
  15873. __ASM volatile("sll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15874. return result;
  15875. }
  15876. /* ===== Inline Function End for 4.29. SLL32 ===== */
  15877. /* ===== Inline Function Start for 4.30. SLLI32 ===== */
  15878. /**
  15879. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  15880. * \brief SLLI32 (SIMD 32-bit Shift Left Logical Immediate)
  15881. * \details
  15882. * **Type**: SIMD (RV64 Only)
  15883. *
  15884. * **Syntax**:\n
  15885. * ~~~
  15886. * SLLI32 Rd, Rs1, imm5u[4:0]
  15887. * ~~~
  15888. *
  15889. * **Purpose**:\n
  15890. * Do 32-bit element logical left shift operations simultaneously. The shift amount is an
  15891. * immediate value.
  15892. *
  15893. * **Description**:\n
  15894. * The 32-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with
  15895. * zero and the shift amount is specified by the imm5u[4:0] constant. And the results are written to Rd.
  15896. *
  15897. * **Operations**:\n
  15898. * ~~~
  15899. * sa = imm5u[4:0];
  15900. * Rd.W[x] = Rs1.W[x] << sa;
  15901. * for RV64: x=1...0
  15902. * ~~~
  15903. *
  15904. * \param [in] a unsigned long type of value stored in a
  15905. * \param [in] b unsigned int type of value stored in b
  15906. * \return value stored in unsigned long type
  15907. */
  15908. #define __RV_SLLI32(a, b) \
  15909. ({ \
  15910. unsigned long result; \
  15911. unsigned long __a = (unsigned long)(a); \
  15912. __ASM volatile("slli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  15913. result; \
  15914. })
  15915. /* ===== Inline Function End for 4.30. SLLI32 ===== */
  15916. /* ===== Inline Function Start for 4.31. SMAX32 ===== */
  15917. /**
  15918. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
  15919. * \brief SMAX32 (SIMD 32-bit Signed Maximum)
  15920. * \details
  15921. * **Type**: SIMD (RV64 Only)
  15922. *
  15923. * **Syntax**:\n
  15924. * ~~~
  15925. * SMAX32 Rd, Rs1, Rs2
  15926. * ~~~
  15927. *
  15928. * **Purpose**:\n
  15929. * Do 32-bit signed integer elements finding maximum operations simultaneously.
  15930. *
  15931. * **Description**:\n
  15932. * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit
  15933. * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
  15934. * selected results are written to Rd.
  15935. *
  15936. * **Operations**:\n
  15937. * ~~~
  15938. * Rd.W[x] = (Rs1.W[x] > Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
  15939. * for RV64: x=1...0
  15940. * ~~~
  15941. *
  15942. * \param [in] a unsigned long type of value stored in a
  15943. * \param [in] b unsigned long type of value stored in b
  15944. * \return value stored in unsigned long type
  15945. */
  15946. __STATIC_FORCEINLINE unsigned long __RV_SMAX32(unsigned long a, unsigned long b)
  15947. {
  15948. unsigned long result;
  15949. __ASM volatile("smax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15950. return result;
  15951. }
  15952. /* ===== Inline Function End for 4.31. SMAX32 ===== */
  15953. /* ===== Inline Function Start for 4.32.1. SMBB32 ===== */
  15954. /**
  15955. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
  15956. * \brief SMBB32 (Signed Multiply Bottom Word & Bottom Word)
  15957. * \details
  15958. * **Type**: DSP (RV64 Only)
  15959. *
  15960. * **Syntax**:\n
  15961. * ~~~
  15962. * SMBB32 Rd, Rs1, Rs2
  15963. * SMBT32 Rd, Rs1, Rs2
  15964. * SMTT32 Rd, Rs1, Rs2
  15965. * ~~~
  15966. *
  15967. * **Purpose**:\n
  15968. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
  15969. * register and write the 64-bit result to a third register.
  15970. * * SMBB32: bottom*bottom
  15971. * * SMBT32: bottom*top
  15972. * * SMTT32: top*top
  15973. *
  15974. * **Description**:\n
  15975. * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  15976. * element of Rs2. It is actually an alias of `MULSR64` instruction.
  15977. * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  15978. * element of Rs2.
  15979. * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
  15980. * of Rs2.
  15981. * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
  15982. * signed integers.
  15983. *
  15984. * **Operations**:\n
  15985. * ~~~
  15986. * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
  15987. * // SMTT32 Rd = res;
  15988. * ~~~
  15989. *
  15990. * \param [in] a unsigned long type of value stored in a
  15991. * \param [in] b unsigned long type of value stored in b
  15992. * \return value stored in long type
  15993. */
  15994. __STATIC_FORCEINLINE long __RV_SMBB32(unsigned long a, unsigned long b)
  15995. {
  15996. long result;
  15997. __ASM volatile("smbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15998. return result;
  15999. }
  16000. /* ===== Inline Function End for 4.32.1. SMBB32 ===== */
  16001. /* ===== Inline Function Start for 4.32.2. SMBT32 ===== */
  16002. /**
  16003. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
  16004. * \brief SMBT32 (Signed Multiply Bottom Word & Top Word)
  16005. * \details
  16006. * **Type**: DSP (RV64 Only)
  16007. *
  16008. * **Syntax**:\n
  16009. * ~~~
  16010. * SMBB32 Rd, Rs1, Rs2
  16011. * SMBT32 Rd, Rs1, Rs2
  16012. * SMTT32 Rd, Rs1, Rs2
  16013. * ~~~
  16014. *
  16015. * **Purpose**:\n
  16016. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
  16017. * register and write the 64-bit result to a third register.
  16018. * * SMBB32: bottom*bottom
  16019. * * SMBT32: bottom*top
  16020. * * SMTT32: top*top
  16021. *
  16022. * **Description**:\n
  16023. * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  16024. * element of Rs2. It is actually an alias of `MULSR64` instruction.
  16025. * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  16026. * element of Rs2.
  16027. * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
  16028. * of Rs2.
  16029. * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
  16030. * signed integers.
  16031. *
  16032. * **Operations**:\n
  16033. * ~~~
  16034. * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
  16035. * // SMTT32 Rd = res;
  16036. * ~~~
  16037. *
  16038. * \param [in] a unsigned long type of value stored in a
  16039. * \param [in] b unsigned long type of value stored in b
  16040. * \return value stored in long type
  16041. */
  16042. __STATIC_FORCEINLINE long __RV_SMBT32(unsigned long a, unsigned long b)
  16043. {
  16044. long result;
  16045. __ASM volatile("smbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16046. return result;
  16047. }
  16048. /* ===== Inline Function End for 4.32.2. SMBT32 ===== */
  16049. /* ===== Inline Function Start for 4.32.3. SMTT32 ===== */
  16050. /**
  16051. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
  16052. * \brief SMTT32 (Signed Multiply Top Word & Top Word)
  16053. * \details
  16054. * **Type**: DSP (RV64 Only)
  16055. *
  16056. * **Syntax**:\n
  16057. * ~~~
  16058. * SMBB32 Rd, Rs1, Rs2
  16059. * SMBT32 Rd, Rs1, Rs2
  16060. * SMTT32 Rd, Rs1, Rs2
  16061. * ~~~
  16062. *
  16063. * **Purpose**:\n
  16064. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
  16065. * register and write the 64-bit result to a third register.
  16066. * * SMBB32: bottom*bottom
  16067. * * SMBT32: bottom*top
  16068. * * SMTT32: top*top
  16069. *
  16070. * **Description**:\n
  16071. * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  16072. * element of Rs2. It is actually an alias of `MULSR64` instruction.
  16073. * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  16074. * element of Rs2.
  16075. * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
  16076. * of Rs2.
  16077. * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
  16078. * signed integers.
  16079. *
  16080. * **Operations**:\n
  16081. * ~~~
  16082. * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
  16083. * // SMTT32 Rd = res;
  16084. * ~~~
  16085. *
  16086. * \param [in] a unsigned long type of value stored in a
  16087. * \param [in] b unsigned long type of value stored in b
  16088. * \return value stored in long type
  16089. */
  16090. __STATIC_FORCEINLINE long __RV_SMTT32(unsigned long a, unsigned long b)
  16091. {
  16092. long result;
  16093. __ASM volatile("smtt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16094. return result;
  16095. }
  16096. /* ===== Inline Function End for 4.32.3. SMTT32 ===== */
  16097. /* ===== Inline Function Start for 4.33.1. SMDS32 ===== */
  16098. /**
  16099. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  16100. * \brief SMDS32 (Signed Multiply Two Words and Subtract)
  16101. * \details
  16102. * **Type**: DSP (RV64 Only)
  16103. *
  16104. * **Syntax**:\n
  16105. * ~~~
  16106. * SMDS32 Rd, Rs1, Rs2
  16107. * SMDRS32 Rd, Rs1, Rs2
  16108. * SMXDS32 Rd, Rs1, Rs2
  16109. * ~~~
  16110. *
  16111. * **Purpose**:\n
  16112. * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
  16113. * perform a subtraction operation between the two 64-bit results.
  16114. * * SMDS32: top*top - bottom*bottom
  16115. * * SMDRS32: bottom*bottom - top*top
  16116. * * SMXDS32: top*bottom - bottom*top
  16117. *
  16118. * **Description**:\n
  16119. * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  16120. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  16121. * Rs1 with the top 32-bit element of Rs2.
  16122. * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
  16123. * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  16124. * element of Rs1 with the bottom 32-bit element of Rs2.
  16125. * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  16126. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  16127. * Rs1 with the bottom 32-bit element of Rs2.
  16128. * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
  16129. * integers.
  16130. *
  16131. * **Operations**:\n
  16132. * ~~~
  16133. * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
  16134. * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
  16135. * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
  16136. * ~~~
  16137. *
  16138. * \param [in] a unsigned long type of value stored in a
  16139. * \param [in] b unsigned long type of value stored in b
  16140. * \return value stored in long type
  16141. */
  16142. __STATIC_FORCEINLINE long __RV_SMDS32(unsigned long a, unsigned long b)
  16143. {
  16144. long result;
  16145. __ASM volatile("smds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16146. return result;
  16147. }
  16148. /* ===== Inline Function End for 4.33.1. SMDS32 ===== */
  16149. /* ===== Inline Function Start for 4.33.2. SMDRS32 ===== */
  16150. /**
  16151. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  16152. * \brief SMDRS32 (Signed Multiply Two Words and Reverse Subtract)
  16153. * \details
  16154. * **Type**: DSP (RV64 Only)
  16155. *
  16156. * **Syntax**:\n
  16157. * ~~~
  16158. * SMDS32 Rd, Rs1, Rs2
  16159. * SMDRS32 Rd, Rs1, Rs2
  16160. * SMXDS32 Rd, Rs1, Rs2
  16161. * ~~~
  16162. *
  16163. * **Purpose**:\n
  16164. * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
  16165. * perform a subtraction operation between the two 64-bit results.
  16166. * * SMDS32: top*top - bottom*bottom
  16167. * * SMDRS32: bottom*bottom - top*top
  16168. * * SMXDS32: top*bottom - bottom*top
  16169. *
  16170. * **Description**:\n
  16171. * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  16172. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  16173. * Rs1 with the top 32-bit element of Rs2.
  16174. * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
  16175. * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  16176. * element of Rs1 with the bottom 32-bit element of Rs2.
  16177. * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  16178. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  16179. * Rs1 with the bottom 32-bit element of Rs2.
  16180. * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
  16181. * integers.
  16182. *
  16183. * **Operations**:\n
  16184. * ~~~
  16185. * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
  16186. * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
  16187. * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
  16188. * ~~~
  16189. *
  16190. * \param [in] a unsigned long type of value stored in a
  16191. * \param [in] b unsigned long type of value stored in b
  16192. * \return value stored in long type
  16193. */
  16194. __STATIC_FORCEINLINE long __RV_SMDRS32(unsigned long a, unsigned long b)
  16195. {
  16196. long result;
  16197. __ASM volatile("smdrs32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16198. return result;
  16199. }
  16200. /* ===== Inline Function End for 4.33.2. SMDRS32 ===== */
  16201. /* ===== Inline Function Start for 4.33.3. SMXDS32 ===== */
  16202. /**
  16203. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  16204. * \brief SMXDS32 (Signed Crossed Multiply Two Words and Subtract)
  16205. * \details
  16206. * **Type**: DSP (RV64 Only)
  16207. *
  16208. * **Syntax**:\n
  16209. * ~~~
  16210. * SMDS32 Rd, Rs1, Rs2
  16211. * SMDRS32 Rd, Rs1, Rs2
  16212. * SMXDS32 Rd, Rs1, Rs2
  16213. * ~~~
  16214. *
  16215. * **Purpose**:\n
  16216. * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
  16217. * perform a subtraction operation between the two 64-bit results.
  16218. * * SMDS32: top*top - bottom*bottom
  16219. * * SMDRS32: bottom*bottom - top*top
  16220. * * SMXDS32: top*bottom - bottom*top
  16221. *
  16222. * **Description**:\n
  16223. * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  16224. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  16225. * Rs1 with the top 32-bit element of Rs2.
  16226. * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
  16227. * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  16228. * element of Rs1 with the bottom 32-bit element of Rs2.
  16229. * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  16230. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  16231. * Rs1 with the bottom 32-bit element of Rs2.
  16232. * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
  16233. * integers.
  16234. *
  16235. * **Operations**:\n
  16236. * ~~~
  16237. * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
  16238. * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
  16239. * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
  16240. * ~~~
  16241. *
  16242. * \param [in] a unsigned long type of value stored in a
  16243. * \param [in] b unsigned long type of value stored in b
  16244. * \return value stored in long type
  16245. */
  16246. __STATIC_FORCEINLINE long __RV_SMXDS32(unsigned long a, unsigned long b)
  16247. {
  16248. long result;
  16249. __ASM volatile("smxds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16250. return result;
  16251. }
  16252. /* ===== Inline Function End for 4.33.3. SMXDS32 ===== */
  16253. /* ===== Inline Function Start for 4.34. SMIN32 ===== */
  16254. /**
  16255. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
  16256. * \brief SMIN32 (SIMD 32-bit Signed Minimum)
  16257. * \details
  16258. * **Type**: SIMD (RV64 Only)
  16259. *
  16260. * **Syntax**:\n
  16261. * ~~~
  16262. * SMIN32 Rd, Rs1, Rs2
  16263. * ~~~
  16264. *
  16265. * **Purpose**:\n
  16266. * Do 32-bit signed integer elements finding minimum operations simultaneously.
  16267. *
  16268. * **Description**:\n
  16269. * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit
  16270. * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
  16271. * results are written to Rd.
  16272. *
  16273. * **Operations**:\n
  16274. * ~~~
  16275. * Rd.W[x] = (Rs1.W[x] < Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
  16276. * for RV64: x=1...0
  16277. * ~~~
  16278. *
  16279. * \param [in] a unsigned long type of value stored in a
  16280. * \param [in] b unsigned long type of value stored in b
  16281. * \return value stored in unsigned long type
  16282. */
  16283. __STATIC_FORCEINLINE unsigned long __RV_SMIN32(unsigned long a, unsigned long b)
  16284. {
  16285. unsigned long result;
  16286. __ASM volatile("smin32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16287. return result;
  16288. }
  16289. /* ===== Inline Function End for 4.34. SMIN32 ===== */
  16290. /* ===== Inline Function Start for 4.35.1. SRA32 ===== */
  16291. /**
  16292. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16293. * \brief SRA32 (SIMD 32-bit Shift Right Arithmetic)
  16294. * \details
  16295. * **Type**: SIMD (RV64 Only)
  16296. *
  16297. * **Syntax**:\n
  16298. * ~~~
  16299. * SRA32 Rd, Rs1, Rs2
  16300. * SRA32.u Rd, Rs1, Rs2
  16301. * ~~~
  16302. *
  16303. * **Purpose**:\n
  16304. * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a
  16305. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  16306. * results.
  16307. *
  16308. * **Description**:\n
  16309. * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  16310. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
  16311. * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
  16312. * added to the most significant discarded bit of each 32-bit data element to calculate the final results.
  16313. * And the results are written to Rd.
  16314. *
  16315. * **Operations**:\n
  16316. * ~~~
  16317. * sa = Rs2[4:0];
  16318. * if (sa > 0) {
  16319. * if (`.u` form) { // SRA32.u
  16320. * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
  16321. * Rd.W[x] = res[31:0];
  16322. * else { // SRA32
  16323. * Rd.W[x] = SE32(Rs1.W[x][31:sa])
  16324. * }
  16325. * } else {
  16326. * Rd = Rs1;
  16327. * }
  16328. * for RV64: x=1...0
  16329. * ~~~
  16330. *
  16331. * \param [in] a unsigned long type of value stored in a
  16332. * \param [in] b unsigned int type of value stored in b
  16333. * \return value stored in unsigned long type
  16334. */
  16335. __STATIC_FORCEINLINE unsigned long __RV_SRA32(unsigned long a, unsigned int b)
  16336. {
  16337. unsigned long result;
  16338. __ASM volatile("sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16339. return result;
  16340. }
  16341. /* ===== Inline Function End for 4.35.1. SRA32 ===== */
  16342. /* ===== Inline Function Start for 4.35.2. SRA32.u ===== */
  16343. /**
  16344. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16345. * \brief SRA32.u (SIMD 32-bit Rounding Shift Right Arithmetic)
  16346. * \details
  16347. * **Type**: SIMD (RV64 Only)
  16348. *
  16349. * **Syntax**:\n
  16350. * ~~~
  16351. * SRA32 Rd, Rs1, Rs2
  16352. * SRA32.u Rd, Rs1, Rs2
  16353. * ~~~
  16354. *
  16355. * **Purpose**:\n
  16356. * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a
  16357. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  16358. * results.
  16359. *
  16360. * **Description**:\n
  16361. * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  16362. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
  16363. * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
  16364. * added to the most significant discarded bit of each 32-bit data element to calculate the final results.
  16365. * And the results are written to Rd.
  16366. *
  16367. * **Operations**:\n
  16368. * ~~~
  16369. * sa = Rs2[4:0];
  16370. * if (sa > 0) {
  16371. * if (`.u` form) { // SRA32.u
  16372. * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
  16373. * Rd.W[x] = res[31:0];
  16374. * else { // SRA32
  16375. * Rd.W[x] = SE32(Rs1.W[x][31:sa])
  16376. * }
  16377. * } else {
  16378. * Rd = Rs1;
  16379. * }
  16380. * for RV64: x=1...0
  16381. * ~~~
  16382. *
  16383. * \param [in] a unsigned long type of value stored in a
  16384. * \param [in] b unsigned int type of value stored in b
  16385. * \return value stored in unsigned long type
  16386. */
  16387. __STATIC_FORCEINLINE unsigned long __RV_SRA32_U(unsigned long a, unsigned int b)
  16388. {
  16389. unsigned long result;
  16390. __ASM volatile("sra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16391. return result;
  16392. }
  16393. /* ===== Inline Function End for 4.35.2. SRA32.u ===== */
  16394. /* ===== Inline Function Start for 4.36.1. SRAI32 ===== */
  16395. /**
  16396. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16397. * \brief SRAI32 (SIMD 32-bit Shift Right Arithmetic Immediate)
  16398. * \details
  16399. * **Type**: DSP (RV64 Only)
  16400. *
  16401. * **Syntax**:\n
  16402. * ~~~
  16403. * SRAI32 Rd, Rs1, imm5u
  16404. * SRAI32.u Rd, Rs1, imm5u
  16405. * ~~~
  16406. *
  16407. * **Purpose**:\n
  16408. * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is
  16409. * an immediate value. The `.u` form performs additional rounding up operations on the shifted
  16410. * results.
  16411. *
  16412. * **Description**:\n
  16413. * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  16414. * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the
  16415. * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
  16416. * significant discarded bit of each 32-bit data to calculate the final results. And the results are written
  16417. * to Rd.
  16418. *
  16419. * **Operations**:\n
  16420. * ~~~
  16421. * sa = imm5u[4:0];
  16422. * if (sa > 0) {
  16423. * if (`.u` form) { // SRAI32.u
  16424. * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
  16425. * Rd.W[x] = res[31:0];
  16426. * else { // SRAI32
  16427. * Rd.W[x] = SE32(Rs1.W[x][31:sa]);
  16428. * }
  16429. * } else {
  16430. * Rd = Rs1;
  16431. * }
  16432. * for RV64: x=1...0
  16433. * ~~~
  16434. *
  16435. * \param [in] a unsigned long type of value stored in a
  16436. * \param [in] b unsigned int type of value stored in b
  16437. * \return value stored in unsigned long type
  16438. */
  16439. #define __RV_SRAI32(a, b) \
  16440. ({ \
  16441. unsigned long result; \
  16442. unsigned long __a = (unsigned long)(a); \
  16443. __ASM volatile("srai32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  16444. result; \
  16445. })
  16446. /* ===== Inline Function End for 4.36.1. SRAI32 ===== */
  16447. /* ===== Inline Function Start for 4.36.2. SRAI32.u ===== */
  16448. /**
  16449. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16450. * \brief SRAI32.u (SIMD 32-bit Rounding Shift Right Arithmetic Immediate)
  16451. * \details
  16452. * **Type**: DSP (RV64 Only)
  16453. *
  16454. * **Syntax**:\n
  16455. * ~~~
  16456. * SRAI32 Rd, Rs1, imm5u
  16457. * SRAI32.u Rd, Rs1, imm5u
  16458. * ~~~
  16459. *
  16460. * **Purpose**:\n
  16461. * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is
  16462. * an immediate value. The `.u` form performs additional rounding up operations on the shifted
  16463. * results.
  16464. *
  16465. * **Description**:\n
  16466. * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  16467. * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the
  16468. * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
  16469. * significant discarded bit of each 32-bit data to calculate the final results. And the results are written
  16470. * to Rd.
  16471. *
  16472. * **Operations**:\n
  16473. * ~~~
  16474. * sa = imm5u[4:0];
  16475. * if (sa > 0) {
  16476. * if (`.u` form) { // SRAI32.u
  16477. * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
  16478. * Rd.W[x] = res[31:0];
  16479. * else { // SRAI32
  16480. * Rd.W[x] = SE32(Rs1.W[x][31:sa]);
  16481. * }
  16482. * } else {
  16483. * Rd = Rs1;
  16484. * }
  16485. * for RV64: x=1...0
  16486. * ~~~
  16487. *
  16488. * \param [in] a unsigned long type of value stored in a
  16489. * \param [in] b unsigned int type of value stored in b
  16490. * \return value stored in unsigned long type
  16491. */
  16492. #define __RV_SRAI32_U(a, b) \
  16493. ({ \
  16494. unsigned long result; \
  16495. unsigned long __a = (unsigned long)(a); \
  16496. __ASM volatile("srai32.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  16497. result; \
  16498. })
  16499. /* ===== Inline Function End for 4.36.2. SRAI32.u ===== */
  16500. /* ===== Inline Function Start for 4.37. SRAIW.u ===== */
  16501. /**
  16502. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT
  16503. * \brief SRAIW.u (Rounding Shift Right Arithmetic Immediate Word)
  16504. * \details
  16505. * **Type**: DSP (RV64 only)
  16506. *
  16507. * **Syntax**:\n
  16508. * ~~~
  16509. * SRAIW.u Rd, Rs1, imm5u
  16510. * ~~~
  16511. *
  16512. * **Purpose**:\n
  16513. * Perform a 32-bit arithmetic right shift operation with rounding. The shift amount is an
  16514. * immediate value.
  16515. *
  16516. * **Description**:\n
  16517. * This instruction right-shifts the lower 32-bit content of Rs1 arithmetically. The shifted
  16518. * out bits are filled with the sign-bit Rs1(31) and the shift amount is specified by the imm5u constant.
  16519. * For the rounding operation, a value of 1 is added to the most significant discarded bit of the data to
  16520. * calculate the final result. And the result is sign-extended and written to Rd.
  16521. *
  16522. * **Operations**:\n
  16523. * ~~~
  16524. * sa = imm5u;
  16525. * if (sa != 0) {
  16526. * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
  16527. * Rd = SE32(res[31:0]);
  16528. * } else {
  16529. * Rd = SE32(Rs1.W[0]);
  16530. * }
  16531. * ~~~
  16532. *
  16533. * \param [in] a int type of value stored in a
  16534. * \param [in] b unsigned int type of value stored in b
  16535. * \return value stored in long type
  16536. */
  16537. #define __RV_SRAIW_U(a, b) \
  16538. ({ \
  16539. long result; \
  16540. int __a = (int)(a); \
  16541. __ASM volatile("sraiw.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  16542. result; \
  16543. })
  16544. /* ===== Inline Function End for 4.37. SRAIW.u ===== */
  16545. /* ===== Inline Function Start for 4.38.1. SRL32 ===== */
  16546. /**
  16547. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16548. * \brief SRL32 (SIMD 32-bit Shift Right Logical)
  16549. * \details
  16550. * **Type**: SIMD (RV64 Only)
  16551. *
  16552. * **Syntax**:\n
  16553. * ~~~
  16554. * SRL32 Rd, Rs1, Rs2
  16555. * SRL32.u Rd, Rs1, Rs2
  16556. * ~~~
  16557. *
  16558. * **Purpose**:\n
  16559. * Do 32-bit element logical right shift operations simultaneously. The shift amount is a
  16560. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  16561. * results.
  16562. *
  16563. * **Description**:\n
  16564. * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  16565. * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2
  16566. * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
  16567. * discarded bit of each 32-bit data element to calculate the final results. And the results are written to
  16568. * Rd.
  16569. *
  16570. * **Operations**:\n
  16571. * ~~~
  16572. * sa = Rs2[4:0];
  16573. * if (sa > 0) {
  16574. * if (`.u` form) { // SRA32.u
  16575. * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
  16576. * Rd.W[x] = res[31:0];
  16577. * else { // SRA32
  16578. * Rd.W[x] = ZE32(Rs1.W[x][31:sa])
  16579. * }
  16580. * } else {
  16581. * Rd = Rs1;
  16582. * }
  16583. * for RV64: x=1...0
  16584. * ~~~
  16585. *
  16586. * \param [in] a unsigned long type of value stored in a
  16587. * \param [in] b unsigned int type of value stored in b
  16588. * \return value stored in unsigned long type
  16589. */
  16590. __STATIC_FORCEINLINE unsigned long __RV_SRL32(unsigned long a, unsigned int b)
  16591. {
  16592. unsigned long result;
  16593. __ASM volatile("srl32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16594. return result;
  16595. }
  16596. /* ===== Inline Function End for 4.38.1. SRL32 ===== */
  16597. /* ===== Inline Function Start for 4.38.2. SRL32.u ===== */
  16598. /**
  16599. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16600. * \brief SRL32.u (SIMD 32-bit Rounding Shift Right Logical)
  16601. * \details
  16602. * **Type**: SIMD (RV64 Only)
  16603. *
  16604. * **Syntax**:\n
  16605. * ~~~
  16606. * SRL32 Rd, Rs1, Rs2
  16607. * SRL32.u Rd, Rs1, Rs2
  16608. * ~~~
  16609. *
  16610. * **Purpose**:\n
  16611. * Do 32-bit element logical right shift operations simultaneously. The shift amount is a
  16612. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  16613. * results.
  16614. *
  16615. * **Description**:\n
  16616. * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  16617. * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2
  16618. * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
  16619. * discarded bit of each 32-bit data element to calculate the final results. And the results are written to
  16620. * Rd.
  16621. *
  16622. * **Operations**:\n
  16623. * ~~~
  16624. * sa = Rs2[4:0];
  16625. * if (sa > 0) {
  16626. * if (`.u` form) { // SRA32.u
  16627. * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
  16628. * Rd.W[x] = res[31:0];
  16629. * else { // SRA32
  16630. * Rd.W[x] = ZE32(Rs1.W[x][31:sa])
  16631. * }
  16632. * } else {
  16633. * Rd = Rs1;
  16634. * }
  16635. * for RV64: x=1...0
  16636. * ~~~
  16637. *
  16638. * \param [in] a unsigned long type of value stored in a
  16639. * \param [in] b unsigned int type of value stored in b
  16640. * \return value stored in unsigned long type
  16641. */
  16642. __STATIC_FORCEINLINE unsigned long __RV_SRL32_U(unsigned long a, unsigned int b)
  16643. {
  16644. unsigned long result;
  16645. __ASM volatile("srl32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16646. return result;
  16647. }
  16648. /* ===== Inline Function End for 4.38.2. SRL32.u ===== */
  16649. /* ===== Inline Function Start for 4.39.1. SRLI32 ===== */
  16650. /**
  16651. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16652. * \brief SRLI32 (SIMD 32-bit Shift Right Logical Immediate)
  16653. * \details
  16654. * **Type**: SIMD (RV64 Only)
  16655. *
  16656. * **Syntax**:\n
  16657. * ~~~
  16658. * SRLI32 Rd, Rs1, imm5u
  16659. * SRLI32.u Rd, Rs1, imm5u
  16660. * ~~~
  16661. *
  16662. * **Purpose**:\n
  16663. * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an
  16664. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  16665. *
  16666. * **Description**:\n
  16667. * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  16668. * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding
  16669. * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit
  16670. * data to calculate the final results. And the results are written to Rd.
  16671. *
  16672. * **Operations**:\n
  16673. * ~~~
  16674. * sa = imm5u[4:0];
  16675. * if (sa > 0) {
  16676. * if (`.u` form) { // SRLI32.u
  16677. * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
  16678. * Rd.W[x] = res[31:0];
  16679. * else { // SRLI32
  16680. * Rd.W[x] = ZE32(Rs1.W[x][31:sa]);
  16681. * }
  16682. * } else {
  16683. * Rd = Rs1;
  16684. * }
  16685. * for RV64: x=1...0
  16686. * ~~~
  16687. *
  16688. * \param [in] a unsigned long type of value stored in a
  16689. * \param [in] b unsigned int type of value stored in b
  16690. * \return value stored in unsigned long type
  16691. */
  16692. #define __RV_SRLI32(a, b) \
  16693. ({ \
  16694. unsigned long result; \
  16695. unsigned long __a = (unsigned long)(a); \
  16696. __ASM volatile("srli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  16697. result; \
  16698. })
  16699. /* ===== Inline Function End for 4.39.1. SRLI32 ===== */
  16700. /* ===== Inline Function Start for 4.39.2. SRLI32.u ===== */
  16701. /**
  16702. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16703. * \brief SRLI32.u (SIMD 32-bit Rounding Shift Right Logical Immediate)
  16704. * \details
  16705. * **Type**: SIMD (RV64 Only)
  16706. *
  16707. * **Syntax**:\n
  16708. * ~~~
  16709. * SRLI32 Rd, Rs1, imm5u
  16710. * SRLI32.u Rd, Rs1, imm5u
  16711. * ~~~
  16712. *
  16713. * **Purpose**:\n
  16714. * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an
  16715. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  16716. *
  16717. * **Description**:\n
  16718. * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  16719. * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding
  16720. * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit
  16721. * data to calculate the final results. And the results are written to Rd.
  16722. *
  16723. * **Operations**:\n
  16724. * ~~~
  16725. * sa = imm5u[4:0];
  16726. * if (sa > 0) {
  16727. * if (`.u` form) { // SRLI32.u
  16728. * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
  16729. * Rd.W[x] = res[31:0];
  16730. * else { // SRLI32
  16731. * Rd.W[x] = ZE32(Rs1.W[x][31:sa]);
  16732. * }
  16733. * } else {
  16734. * Rd = Rs1;
  16735. * }
  16736. * for RV64: x=1...0
  16737. * ~~~
  16738. *
  16739. * \param [in] a unsigned long type of value stored in a
  16740. * \param [in] b unsigned int type of value stored in b
  16741. * \return value stored in unsigned long type
  16742. */
  16743. #define __RV_SRLI32_U(a, b) \
  16744. ({ \
  16745. unsigned long result; \
  16746. unsigned long __a = (unsigned long)(a); \
  16747. __ASM volatile("srli32.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  16748. result; \
  16749. })
  16750. /* ===== Inline Function End for 4.39.2. SRLI32.u ===== */
  16751. /* ===== Inline Function Start for 4.40. STAS32 ===== */
  16752. /**
  16753. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  16754. * \brief STAS32 (SIMD 32-bit Straight Addition & Subtraction)
  16755. * \details
  16756. * **Type**: SIMD (RV64 Only)
  16757. *
  16758. * **Syntax**:\n
  16759. * ~~~
  16760. * STAS32 Rd, Rs1, Rs2
  16761. * ~~~
  16762. *
  16763. * **Purpose**:\n
  16764. * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit
  16765. * chunk simultaneously. Operands are from corresponding 32-bit elements.
  16766. *
  16767. * **Description**:\n
  16768. * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
  16769. * integer element in [63:32] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts
  16770. * the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and
  16771. * writes the result to [31:0] of Rd.
  16772. *
  16773. * **Note**:\n
  16774. * This instruction can be used for either signed or unsigned operations.
  16775. *
  16776. * **Operations**:\n
  16777. * ~~~
  16778. * Rd.W[1] = Rs1.W[1] + Rs2.W[1];
  16779. * Rd.W[0] = Rs1.W[0] - Rs2.W[0];
  16780. * ~~~
  16781. *
  16782. * \param [in] a unsigned long type of value stored in a
  16783. * \param [in] b unsigned long type of value stored in b
  16784. * \return value stored in unsigned long type
  16785. */
  16786. __STATIC_FORCEINLINE unsigned long __RV_STAS32(unsigned long a, unsigned long b)
  16787. {
  16788. unsigned long result;
  16789. __ASM volatile("stas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16790. return result;
  16791. }
  16792. /* ===== Inline Function End for 4.40. STAS32 ===== */
  16793. /* ===== Inline Function Start for 4.41. STSA32 ===== */
  16794. /**
  16795. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  16796. * \brief STSA32 (SIMD 32-bit Straight Subtraction & Addition)
  16797. * \details
  16798. * **Type**: SIMD (RV64 Only)
  16799. *
  16800. * **Syntax**:\n
  16801. * ~~~
  16802. * STSA32 Rd, Rs1, Rs2
  16803. * ~~~
  16804. *
  16805. * **Purpose**:\n
  16806. * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit
  16807. * chunk simultaneously. Operands are from corresponding 32-bit elements.
  16808. * *Description: *
  16809. * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer
  16810. * element in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit
  16811. * integer element in [31:0] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and writes the result
  16812. * to [31:0] of Rd
  16813. *
  16814. * **Note**:\n
  16815. * This instruction can be used for either signed or unsigned operations.
  16816. *
  16817. * **Operations**:\n
  16818. * ~~~
  16819. * Rd.W[1] = Rs1.W[1] - Rs2.W[1];
  16820. * Rd.W[0] = Rs1.W[0] + Rs2.W[0];
  16821. * ~~~
  16822. *
  16823. * \param [in] a unsigned long type of value stored in a
  16824. * \param [in] b unsigned long type of value stored in b
  16825. * \return value stored in unsigned long type
  16826. */
  16827. __STATIC_FORCEINLINE unsigned long __RV_STSA32(unsigned long a, unsigned long b)
  16828. {
  16829. unsigned long result;
  16830. __ASM volatile("stsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16831. return result;
  16832. }
  16833. /* ===== Inline Function End for 4.41. STSA32 ===== */
  16834. /* ===== Inline Function Start for 4.42. SUB32 ===== */
  16835. /**
  16836. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  16837. * \brief SUB32 (SIMD 32-bit Subtraction)
  16838. * \details
  16839. * **Type**: DSP (RV64 Only)
  16840. *
  16841. * **Syntax**:\n
  16842. * ~~~
  16843. * SUB32 Rd, Rs1, Rs2
  16844. * ~~~
  16845. *
  16846. * **Purpose**:\n
  16847. * Do 32-bit integer element subtractions simultaneously.
  16848. *
  16849. * **Description**:\n
  16850. * This instruction subtracts the 32-bit integer elements in Rs2 from the 32-bit integer
  16851. * elements in Rs1, and then writes the results to Rd.
  16852. *
  16853. * **Note**:\n
  16854. * This instruction can be used for either signed or unsigned subtraction.
  16855. *
  16856. * **Operations**:\n
  16857. * ~~~
  16858. * Rd.W[x] = Rs1.W[x] - Rs2.W[x];
  16859. * for RV64: x=1...0
  16860. * ~~~
  16861. *
  16862. * \param [in] a unsigned long type of value stored in a
  16863. * \param [in] b unsigned long type of value stored in b
  16864. * \return value stored in unsigned long type
  16865. */
  16866. __STATIC_FORCEINLINE unsigned long __RV_SUB32(unsigned long a, unsigned long b)
  16867. {
  16868. unsigned long result;
  16869. __ASM volatile("sub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16870. return result;
  16871. }
  16872. /* ===== Inline Function End for 4.42. SUB32 ===== */
  16873. /* ===== Inline Function Start for 4.43. UKADD32 ===== */
  16874. /**
  16875. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  16876. * \brief UKADD32 (SIMD 32-bit Unsigned Saturating Addition)
  16877. * \details
  16878. * **Type**: SIMD (RV64 Only)
  16879. *
  16880. * **Syntax**:\n
  16881. * ~~~
  16882. * UKADD32 Rd, Rs1, Rs2
  16883. * ~~~
  16884. *
  16885. * **Purpose**:\n
  16886. * Do 32-bit unsigned integer element saturating additions simultaneously.
  16887. *
  16888. * **Description**:\n
  16889. * This instruction adds the 32-bit unsigned integer elements in Rs1 with the 32-bit
  16890. * unsigned integer elements in Rs2. If any of the results are beyond the 32-bit unsigned number
  16891. * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated
  16892. * results are written to Rd.
  16893. *
  16894. * **Operations**:\n
  16895. * ~~~
  16896. * res[x] = Rs1.W[x] + Rs2.W[x];
  16897. * if (res[x] > (2^32)-1) {
  16898. * res[x] = (2^32)-1;
  16899. * OV = 1;
  16900. * }
  16901. * Rd.W[x] = res[x];
  16902. * for RV64: x=1...0
  16903. * ~~~
  16904. *
  16905. * \param [in] a unsigned long type of value stored in a
  16906. * \param [in] b unsigned long type of value stored in b
  16907. * \return value stored in unsigned long type
  16908. */
  16909. __STATIC_FORCEINLINE unsigned long __RV_UKADD32(unsigned long a, unsigned long b)
  16910. {
  16911. unsigned long result;
  16912. __ASM volatile("ukadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16913. return result;
  16914. }
  16915. /* ===== Inline Function End for 4.43. UKADD32 ===== */
  16916. /* ===== Inline Function Start for 4.44. UKCRAS32 ===== */
  16917. /**
  16918. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  16919. * \brief UKCRAS32 (SIMD 32-bit Unsigned Saturating Cross Addition & Subtraction)
  16920. * \details
  16921. * **Type**: SIMD (RV64 Only)
  16922. *
  16923. * **Syntax**:\n
  16924. * ~~~
  16925. * UKCRAS32 Rd, Rs1, Rs2
  16926. * ~~~
  16927. *
  16928. * **Purpose**:\n
  16929. * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned
  16930. * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed
  16931. * 32-bit elements.
  16932. *
  16933. * **Description**:\n
  16934. * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
  16935. * bit unsigned integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit unsigned
  16936. * integer element in [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the
  16937. * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
  16938. * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and
  16939. * [31:0] of Rd for subtraction.
  16940. *
  16941. * **Operations**:\n
  16942. * ~~~
  16943. * res1 = Rs1.W[1] + Rs2.W[0];
  16944. * res2 = Rs1.W[0] - Rs2.W[1];
  16945. * if (res1 > (2^32)-1) {
  16946. * res1 = (2^32)-1;
  16947. * OV = 1;
  16948. * }
  16949. * if (res2 < 0) {
  16950. * res2 = 0;
  16951. * OV = 1;
  16952. * }
  16953. * Rd.W[1] = res1;
  16954. * Rd.W[0] = res2;
  16955. * ~~~
  16956. *
  16957. * \param [in] a unsigned long type of value stored in a
  16958. * \param [in] b unsigned long type of value stored in b
  16959. * \return value stored in unsigned long type
  16960. */
  16961. __STATIC_FORCEINLINE unsigned long __RV_UKCRAS32(unsigned long a, unsigned long b)
  16962. {
  16963. unsigned long result;
  16964. __ASM volatile("ukcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16965. return result;
  16966. }
  16967. /* ===== Inline Function End for 4.44. UKCRAS32 ===== */
  16968. /* ===== Inline Function Start for 4.45. UKCRSA32 ===== */
  16969. /**
  16970. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  16971. * \brief UKCRSA32 (SIMD 32-bit Unsigned Saturating Cross Subtraction & Addition)
  16972. * \details
  16973. * **Type**: SIMD (RV64 Only)
  16974. *
  16975. * **Syntax**:\n
  16976. * ~~~
  16977. * UKCRSA32 Rd, Rs1, Rs2
  16978. * ~~~
  16979. *
  16980. * **Purpose**:\n
  16981. * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned
  16982. * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from crossed
  16983. * 32-bit elements.
  16984. *
  16985. * **Description**:\n
  16986. * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the
  16987. * 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned
  16988. * integer element in [63:32] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the
  16989. * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
  16990. * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and
  16991. * [31:0] of Rd for addition.
  16992. *
  16993. * **Operations**:\n
  16994. * ~~~
  16995. * res1 = Rs1.W[1] - Rs2.W[0];
  16996. * res2 = Rs1.W[0] + Rs2.W[1];
  16997. * if (res1 < 0) {
  16998. * res1 = 0;
  16999. * OV = 1;
  17000. * } else if (res2 > (2^32)-1) {
  17001. * res2 = (2^32)-1;
  17002. * OV = 1;
  17003. * }
  17004. * Rd.W[1] = res1;
  17005. * Rd.W[0] = res2;
  17006. * ~~~
  17007. *
  17008. * \param [in] a unsigned long type of value stored in a
  17009. * \param [in] b unsigned long type of value stored in b
  17010. * \return value stored in unsigned long type
  17011. */
  17012. __STATIC_FORCEINLINE unsigned long __RV_UKCRSA32(unsigned long a, unsigned long b)
  17013. {
  17014. unsigned long result;
  17015. __ASM volatile("ukcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17016. return result;
  17017. }
  17018. /* ===== Inline Function End for 4.45. UKCRSA32 ===== */
  17019. /* ===== Inline Function Start for 4.46. UKSTAS32 ===== */
  17020. /**
  17021. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17022. * \brief UKSTAS32 (SIMD 32-bit Unsigned Saturating Straight Addition & Subtraction)
  17023. * \details
  17024. * **Type**: SIMD (RV64 Only)
  17025. *
  17026. * **Syntax**:\n
  17027. * ~~~
  17028. * UKSTAS32 Rd, Rs1, Rs2
  17029. * ~~~
  17030. *
  17031. * **Purpose**:\n
  17032. * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned
  17033. * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from
  17034. * corresponding 32-bit elements.
  17035. *
  17036. * **Description**:\n
  17037. * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
  17038. * bit unsigned integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit unsigned
  17039. * integer element in [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the
  17040. * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
  17041. * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and
  17042. * [31:0] of Rd for subtraction.
  17043. *
  17044. * **Operations**:\n
  17045. * ~~~
  17046. * res1 = Rs1.W[1] + Rs2.W[1];
  17047. * res2 = Rs1.W[0] - Rs2.W[0];
  17048. * if (res1 > (2^32)-1) {
  17049. * res1 = (2^32)-1;
  17050. * OV = 1;
  17051. * }
  17052. * if (res2 < 0) {
  17053. * res2 = 0;
  17054. * OV = 1;
  17055. * }
  17056. * Rd.W[1] = res1;
  17057. * Rd.W[0] = res2;
  17058. * ~~~
  17059. *
  17060. * \param [in] a unsigned long type of value stored in a
  17061. * \param [in] b unsigned long type of value stored in b
  17062. * \return value stored in unsigned long type
  17063. */
  17064. __STATIC_FORCEINLINE unsigned long __RV_UKSTAS32(unsigned long a, unsigned long b)
  17065. {
  17066. unsigned long result;
  17067. __ASM volatile("ukstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17068. return result;
  17069. }
  17070. /* ===== Inline Function End for 4.46. UKSTAS32 ===== */
  17071. /* ===== Inline Function Start for 4.47. UKSTSA32 ===== */
  17072. /**
  17073. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17074. * \brief UKSTSA32 (SIMD 32-bit Unsigned Saturating Straight Subtraction & Addition)
  17075. * \details
  17076. * **Type**: SIMD (RV64 Only)
  17077. *
  17078. * **Syntax**:\n
  17079. * ~~~
  17080. * UKSTSA32 Rd, Rs1, Rs2
  17081. * ~~~
  17082. *
  17083. * **Purpose**:\n
  17084. * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned
  17085. * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from
  17086. * corresponding 32-bit elements.
  17087. *
  17088. * **Description**:\n
  17089. * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from
  17090. * the 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned
  17091. * integer element in [31:0] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the
  17092. * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
  17093. * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and
  17094. * [31:0] of Rd for addition.
  17095. *
  17096. * **Operations**:\n
  17097. * ~~~
  17098. * res1 = Rs1.W[1] - Rs2.W[1];
  17099. * res2 = Rs1.W[0] + Rs2.W[0];
  17100. * if (res1 < 0) {
  17101. * res1 = 0;
  17102. * OV = 1;
  17103. * } else if (res2 > (2^32)-1) {
  17104. * res2 = (2^32)-1;
  17105. * OV = 1;
  17106. * }
  17107. * Rd.W[1] = res1;
  17108. * Rd.W[0] = res2;
  17109. * ~~~
  17110. *
  17111. * \param [in] a unsigned long type of value stored in a
  17112. * \param [in] b unsigned long type of value stored in b
  17113. * \return value stored in unsigned long type
  17114. */
  17115. __STATIC_FORCEINLINE unsigned long __RV_UKSTSA32(unsigned long a, unsigned long b)
  17116. {
  17117. unsigned long result;
  17118. __ASM volatile("ukstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17119. return result;
  17120. }
  17121. /* ===== Inline Function End for 4.47. UKSTSA32 ===== */
  17122. /* ===== Inline Function Start for 4.48. UKSUB32 ===== */
  17123. /**
  17124. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17125. * \brief UKSUB32 (SIMD 32-bit Unsigned Saturating Subtraction)
  17126. * \details
  17127. * **Type**: SIMD (RV64 Only)
  17128. *
  17129. * **Syntax**:\n
  17130. * ~~~
  17131. * UKSUB32 Rd, Rs1, Rs2
  17132. * ~~~
  17133. *
  17134. * **Purpose**:\n
  17135. * Do 32-bit unsigned integer elements saturating subtractions simultaneously.
  17136. *
  17137. * **Description**:\n
  17138. * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit
  17139. * unsigned integer elements in Rs1. If any of the results are beyond the 32-bit unsigned number
  17140. * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated
  17141. * results are written to Rd.
  17142. *
  17143. * **Operations**:\n
  17144. * ~~~
  17145. * res[x] = Rs1.W[x] - Rs2.W[x];
  17146. * if (res[x] < 0) {
  17147. * res[x] = 0;
  17148. * OV = 1;
  17149. * }
  17150. * Rd.W[x] = res[x];
  17151. * for RV64: x=1...0
  17152. * ~~~
  17153. *
  17154. * \param [in] a unsigned long type of value stored in a
  17155. * \param [in] b unsigned long type of value stored in b
  17156. * \return value stored in unsigned long type
  17157. */
  17158. __STATIC_FORCEINLINE unsigned long __RV_UKSUB32(unsigned long a, unsigned long b)
  17159. {
  17160. unsigned long result;
  17161. __ASM volatile("uksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17162. return result;
  17163. }
  17164. /* ===== Inline Function End for 4.48. UKSUB32 ===== */
  17165. /* ===== Inline Function Start for 4.49. UMAX32 ===== */
  17166. /**
  17167. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
  17168. * \brief UMAX32 (SIMD 32-bit Unsigned Maximum)
  17169. * \details
  17170. * **Type**: SIMD (RV64 Only)
  17171. *
  17172. * **Syntax**:\n
  17173. * ~~~
  17174. * UMAX32 Rd, Rs1, Rs2
  17175. * ~~~
  17176. *
  17177. * **Purpose**:\n
  17178. * Do 32-bit unsigned integer elements finding maximum operations simultaneously.
  17179. *
  17180. * **Description**:\n
  17181. * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit
  17182. * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
  17183. * selected results are written to Rd.
  17184. *
  17185. * **Operations**:\n
  17186. * ~~~
  17187. * Rd.W[x] = (Rs1.W[x] u> Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
  17188. * for RV64: x=1...0
  17189. * ~~~
  17190. *
  17191. * \param [in] a unsigned long type of value stored in a
  17192. * \param [in] b unsigned long type of value stored in b
  17193. * \return value stored in unsigned long type
  17194. */
  17195. __STATIC_FORCEINLINE unsigned long __RV_UMAX32(unsigned long a, unsigned long b)
  17196. {
  17197. unsigned long result;
  17198. __ASM volatile("umax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17199. return result;
  17200. }
  17201. /* ===== Inline Function End for 4.49. UMAX32 ===== */
  17202. /* ===== Inline Function Start for 4.50. UMIN32 ===== */
  17203. /**
  17204. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
  17205. * \brief UMIN32 (SIMD 32-bit Unsigned Minimum)
  17206. * \details
  17207. * **Type**: SIMD (RV64 Only)
  17208. *
  17209. * **Syntax**:\n
  17210. * ~~~
  17211. * UMIN32 Rd, Rs1, Rs2
  17212. * ~~~
  17213. *
  17214. * **Purpose**:\n
  17215. * Do 32-bit unsigned integer elements finding minimum operations simultaneously.
  17216. *
  17217. * **Description**:\n
  17218. * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit
  17219. * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
  17220. * selected results are written to Rd.
  17221. *
  17222. * **Operations**:\n
  17223. * ~~~
  17224. * Rd.W[x] = (Rs1.W[x] <u Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
  17225. * for RV64: x=1...0
  17226. * ~~~
  17227. *
  17228. * \param [in] a unsigned long type of value stored in a
  17229. * \param [in] b unsigned long type of value stored in b
  17230. * \return value stored in unsigned long type
  17231. */
  17232. __STATIC_FORCEINLINE unsigned long __RV_UMIN32(unsigned long a, unsigned long b)
  17233. {
  17234. unsigned long result;
  17235. __ASM volatile("umin32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17236. return result;
  17237. }
  17238. /* ===== Inline Function End for 4.50. UMIN32 ===== */
  17239. /* ===== Inline Function Start for 4.51. URADD32 ===== */
  17240. /**
  17241. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17242. * \brief URADD32 (SIMD 32-bit Unsigned Halving Addition)
  17243. * \details
  17244. * **Type**: SIMD (RV64 Only)
  17245. *
  17246. * **Syntax**:\n
  17247. * ~~~
  17248. * URADD32 Rd, Rs1, Rs2
  17249. * ~~~
  17250. *
  17251. * **Purpose**:\n
  17252. * Do 32-bit unsigned integer element additions simultaneously. The results are halved to
  17253. * avoid overflow or saturation.
  17254. *
  17255. * **Description**:\n
  17256. * This instruction adds the 32-bit unsigned integer elements in Rs1 with the 32-bit
  17257. * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
  17258. * written to Rd.
  17259. *
  17260. * **Examples**:\n
  17261. * ~~~
  17262. * * Ra = 0x7FFFFFFF, Rb = 0x7FFFFFFF Rt = 0x7FFFFFFF
  17263. * * Ra = 0x80000000, Rb = 0x80000000 Rt = 0x80000000
  17264. * * Ra = 0x40000000, Rb = 0x80000000 Rt = 0x60000000
  17265. * ~~~
  17266. *
  17267. * **Operations**:\n
  17268. * ~~~
  17269. * Rd.W[x] = (Rs1.W[x] + Rs2.W[x]) u>> 1;
  17270. * for RV64: x=1...0
  17271. * ~~~
  17272. *
  17273. * \param [in] a unsigned long type of value stored in a
  17274. * \param [in] b unsigned long type of value stored in b
  17275. * \return value stored in unsigned long type
  17276. */
  17277. __STATIC_FORCEINLINE unsigned long __RV_URADD32(unsigned long a, unsigned long b)
  17278. {
  17279. unsigned long result;
  17280. __ASM volatile("uradd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17281. return result;
  17282. }
  17283. /* ===== Inline Function End for 4.51. URADD32 ===== */
  17284. /* ===== Inline Function Start for 4.52. URCRAS32 ===== */
  17285. /**
  17286. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17287. * \brief URCRAS32 (SIMD 32-bit Unsigned Halving Cross Addition & Subtraction)
  17288. * \details
  17289. * **Type**: SIMD (RV64 Only)
  17290. *
  17291. * **Syntax**:\n
  17292. * ~~~
  17293. * URCRAS32 Rd, Rs1, Rs2
  17294. * ~~~
  17295. *
  17296. * **Purpose**:\n
  17297. * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element
  17298. * subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The
  17299. * results are halved to avoid overflow or saturation.
  17300. *
  17301. * **Description**:\n
  17302. * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
  17303. * bit unsigned integer element in [31:0] of Rs2, and subtracts the 32-bit unsigned integer element in
  17304. * [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first
  17305. * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for
  17306. * subtraction.
  17307. *
  17308. * **Examples**:\n
  17309. * ~~~
  17310. * Please see `URADD32` and `URSUB32` instructions.
  17311. * ~~~
  17312. *
  17313. * **Operations**:\n
  17314. * ~~~
  17315. * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) u>> 1;
  17316. * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) u>> 1;
  17317. * ~~~
  17318. *
  17319. * \param [in] a unsigned long type of value stored in a
  17320. * \param [in] b unsigned long type of value stored in b
  17321. * \return value stored in unsigned long type
  17322. */
  17323. __STATIC_FORCEINLINE unsigned long __RV_URCRAS32(unsigned long a, unsigned long b)
  17324. {
  17325. unsigned long result;
  17326. __ASM volatile("urcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17327. return result;
  17328. }
  17329. /* ===== Inline Function End for 4.52. URCRAS32 ===== */
  17330. /* ===== Inline Function Start for 4.53. URCRSA32 ===== */
  17331. /**
  17332. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17333. * \brief URCRSA32 (SIMD 32-bit Unsigned Halving Cross Subtraction & Addition)
  17334. * \details
  17335. * **Type**: SIMD (RV64 Only)
  17336. *
  17337. * **Syntax**:\n
  17338. * ~~~
  17339. * URCRSA32 Rd, Rs1, Rs2
  17340. * ~~~
  17341. *
  17342. * **Purpose**:\n
  17343. * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element
  17344. * addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results
  17345. * are halved to avoid overflow or saturation.
  17346. *
  17347. * **Description**:\n
  17348. * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the
  17349. * 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer in
  17350. * [31:0] of Rs1 with the 32-bit unsigned integer element in [63:32] of Rs2. The two results are first
  17351. * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for
  17352. * addition.
  17353. *
  17354. * **Examples**:\n
  17355. * ~~~
  17356. * Please see `URADD32` and `URSUB32` instructions.
  17357. * ~~~
  17358. *
  17359. * **Operations**:\n
  17360. * ~~~
  17361. * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) u>> 1;
  17362. * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) u>> 1;
  17363. * ~~~
  17364. *
  17365. * \param [in] a unsigned long type of value stored in a
  17366. * \param [in] b unsigned long type of value stored in b
  17367. * \return value stored in unsigned long type
  17368. */
  17369. __STATIC_FORCEINLINE unsigned long __RV_URCRSA32(unsigned long a, unsigned long b)
  17370. {
  17371. unsigned long result;
  17372. __ASM volatile("urcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17373. return result;
  17374. }
  17375. /* ===== Inline Function End for 4.53. URCRSA32 ===== */
  17376. /* ===== Inline Function Start for 4.54. URSTAS32 ===== */
  17377. /**
  17378. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17379. * \brief URSTAS32 (SIMD 32-bit Unsigned Halving Straight Addition & Subtraction)
  17380. * \details
  17381. * **Type**: SIMD (RV64 Only)
  17382. *
  17383. * **Syntax**:\n
  17384. * ~~~
  17385. * URSTAS32 Rd, Rs1, Rs2
  17386. * ~~~
  17387. *
  17388. * **Purpose**:\n
  17389. * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element
  17390. * subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements.
  17391. * The results are halved to avoid overflow or saturation.
  17392. *
  17393. * **Description**:\n
  17394. * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
  17395. * bit unsigned integer element in [63:32] of Rs2, and subtracts the 32-bit unsigned integer element in
  17396. * [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first
  17397. * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for
  17398. * subtraction.
  17399. *
  17400. * **Examples**:\n
  17401. * ~~~
  17402. * Please see `URADD32` and `URSUB32` instructions.
  17403. * ~~~
  17404. *
  17405. * **Operations**:\n
  17406. * ~~~
  17407. * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) u>> 1;
  17408. * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) u>> 1;
  17409. * ~~~
  17410. *
  17411. * \param [in] a unsigned long type of value stored in a
  17412. * \param [in] b unsigned long type of value stored in b
  17413. * \return value stored in unsigned long type
  17414. */
  17415. __STATIC_FORCEINLINE unsigned long __RV_URSTAS32(unsigned long a, unsigned long b)
  17416. {
  17417. unsigned long result;
  17418. __ASM volatile("urstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17419. return result;
  17420. }
  17421. /* ===== Inline Function End for 4.54. URSTAS32 ===== */
  17422. /* ===== Inline Function Start for 4.55. URSTSA32 ===== */
  17423. /**
  17424. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17425. * \brief URSTSA32 (SIMD 32-bit Unsigned Halving Straight Subtraction & Addition)
  17426. * \details
  17427. * **Type**: SIMD (RV64 Only)
  17428. *
  17429. * **Syntax**:\n
  17430. * ~~~
  17431. * URSTSA32 Rd, Rs1, Rs2
  17432. * ~~~
  17433. *
  17434. * **Purpose**:\n
  17435. * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element
  17436. * addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The
  17437. * results are halved to avoid overflow or saturation.
  17438. *
  17439. * **Description**:\n
  17440. * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from
  17441. * the 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer
  17442. * in [31:0] of Rs1 with the 32-bit unsigned integer element in [31:0] of Rs2. The two results are first
  17443. * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for
  17444. * addition.
  17445. *
  17446. * **Examples**:\n
  17447. * ~~~
  17448. * Please see `URADD32` and `URSUB32` instructions.
  17449. * ~~~
  17450. *
  17451. * **Operations**:\n
  17452. * ~~~
  17453. * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) u>> 1;
  17454. * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) u>> 1;
  17455. * ~~~
  17456. *
  17457. * \param [in] a unsigned long type of value stored in a
  17458. * \param [in] b unsigned long type of value stored in b
  17459. * \return value stored in unsigned long type
  17460. */
  17461. __STATIC_FORCEINLINE unsigned long __RV_URSTSA32(unsigned long a, unsigned long b)
  17462. {
  17463. unsigned long result;
  17464. __ASM volatile("urstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17465. return result;
  17466. }
  17467. /* ===== Inline Function End for 4.55. URSTSA32 ===== */
  17468. /* ===== Inline Function Start for 4.56. URSUB32 ===== */
  17469. /**
  17470. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17471. * \brief URSUB32 (SIMD 32-bit Unsigned Halving Subtraction)
  17472. * \details
  17473. * **Type**: SIMD (RV64 Only)
  17474. *
  17475. * **Syntax**:\n
  17476. * ~~~
  17477. * URSUB32 Rd, Rs1, Rs2
  17478. * ~~~
  17479. *
  17480. * **Purpose**:\n
  17481. * Do 32-bit unsigned integer element subtractions simultaneously. The results are halved to
  17482. * avoid overflow or saturation.
  17483. *
  17484. * **Description**:\n
  17485. * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit
  17486. * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
  17487. * written to Rd.
  17488. *
  17489. * **Examples**:\n
  17490. * ~~~
  17491. * * Ra = 0x7FFFFFFF, Rb = 0x80000000, Rt = 0xFFFFFFFF
  17492. * * Ra = 0x80000000, Rb = 0x7FFFFFFF, Rt = 0x00000000
  17493. * * Ra = 0x80000000, Rb = 0x40000000, Rt = 0x20000000
  17494. * ~~~
  17495. *
  17496. * **Operations**:\n
  17497. * ~~~
  17498. * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) u>> 1;
  17499. * for RV64: x=1...0
  17500. * ~~~
  17501. *
  17502. * \param [in] a unsigned long type of value stored in a
  17503. * \param [in] b unsigned long type of value stored in b
  17504. * \return value stored in unsigned long type
  17505. */
  17506. __STATIC_FORCEINLINE unsigned long __RV_URSUB32(unsigned long a, unsigned long b)
  17507. {
  17508. unsigned long result;
  17509. __ASM volatile("ursub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17510. return result;
  17511. }
  17512. /* ===== Inline Function End for 4.56. URSUB32 ===== */
  17513. #endif /* __RISCV_XLEN == 64 */
  17514. /**
  17515. * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default Nuclei Default SIMD DSP Additional Instructions
  17516. * \ingroup NMSIS_Core_DSP_Intrinsic
  17517. * \brief (RV32 & RV64)Nuclei Customized DSP Instructions
  17518. * \details This is Nuclei customized DSP instructions for both RV32 and RV64
  17519. */
  17520. /* ===== Inline Function Start for EXPD80 ===== */
  17521. /**
  17522. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17523. * \brief EXPD80 (Expand and Copy Byte 0 to 32bit(when rv32) or 64bit(when rv64))
  17524. * \details
  17525. * **Type**: DSP
  17526. *
  17527. * **Syntax**:\n
  17528. * ~~~
  17529. * EXPD80 Rd, Rs1
  17530. * ~~~
  17531. *
  17532. * **Purpose**:\n
  17533. * When rv32, Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
  17534. * When rv64, Copy 8-bit data from 64-bit chunks into 8 bytes in a register.
  17535. *
  17536. * **Description**:\n
  17537. * Moves Rs1.B[0][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17538. *
  17539. * **Operations**:\n
  17540. * ~~~
  17541. * Rd.W[x][31:0] = CONCAT(Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0]);
  17542. * for RV32: x=0
  17543. * ~~~
  17544. *
  17545. * \param [in] a unsigned long type of value stored in a
  17546. * \return value stored in unsigned long type
  17547. */
  17548. __STATIC_FORCEINLINE unsigned long __RV_EXPD80(unsigned long a)
  17549. {
  17550. unsigned long result;
  17551. __ASM volatile("expd80 %0, %1" : "=r"(result) : "r"(a));
  17552. return result;
  17553. }
  17554. /* ===== Inline Function End for EXPD80 ===== */
  17555. /* ===== Inline Function Start for EXPD81 ===== */
  17556. /**
  17557. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17558. * \brief EXPD81 (Expand and Copy Byte 1 to 32bit(rv32) or 64bit(when rv64))
  17559. * \details
  17560. * **Type**: DSP
  17561. *
  17562. * **Syntax**:\n
  17563. * ~~~
  17564. * EXPD81 Rd, Rs1
  17565. * ~~~
  17566. *
  17567. * **Purpose**:\n
  17568. * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
  17569. *
  17570. * **Description**:\n
  17571. * Moves Rs1.B[1][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17572. *
  17573. * **Operations**:\n
  17574. * ~~~
  17575. * Rd.W[x][31:0] = CONCAT(Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0]);
  17576. * for RV32: x=0
  17577. * ~~~
  17578. *
  17579. * \param [in] a unsigned long type of value stored in a
  17580. * \return value stored in unsigned long type
  17581. */
  17582. __STATIC_FORCEINLINE unsigned long __RV_EXPD81(unsigned long a)
  17583. {
  17584. unsigned long result;
  17585. __ASM volatile("expd81 %0, %1" : "=r"(result) : "r"(a));
  17586. return result;
  17587. }
  17588. /* ===== Inline Function End for EXPD81 ===== */
  17589. /* ===== Inline Function Start for EXPD82 ===== */
  17590. /**
  17591. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17592. * \brief EXPD82 (Expand and Copy Byte 2 to 32bit(rv32) or 64bit(when rv64))
  17593. * \details
  17594. * **Type**: DSP
  17595. *
  17596. * **Syntax**:\n
  17597. * ~~~
  17598. * EXPD82 Rd, Rs1
  17599. * ~~~
  17600. *
  17601. * **Purpose**:\n
  17602. * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
  17603. *
  17604. * **Description**:\n
  17605. * Moves Rs1.B[2][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17606. *
  17607. * **Operations**:\n
  17608. * ~~~
  17609. * Rd.W[x][31:0] = CONCAT(Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0]);
  17610. * for RV32: x=0
  17611. * ~~~
  17612. *
  17613. * \param [in] a unsigned long type of value stored in a
  17614. * \return value stored in unsigned long type
  17615. */
  17616. __STATIC_FORCEINLINE unsigned long __RV_EXPD82(unsigned long a)
  17617. {
  17618. unsigned long result;
  17619. __ASM volatile("expd82 %0, %1" : "=r"(result) : "r"(a));
  17620. return result;
  17621. }
  17622. /* ===== Inline Function End for EXPD82 ===== */
  17623. /* ===== Inline Function Start for EXPD83 ===== */
  17624. /**
  17625. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17626. * \brief EXPD83 (Expand and Copy Byte 3 to 32bit(rv32) or 64bit(when rv64))
  17627. * \details
  17628. * **Type**: DSP
  17629. *
  17630. * **Syntax**:\n
  17631. * ~~~
  17632. * EXPD83 Rd, Rs1
  17633. * ~~~
  17634. *
  17635. * **Purpose**:\n
  17636. * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
  17637. *
  17638. * **Description**:\n
  17639. * Moves Rs1.B[3][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17640. *
  17641. * **Operations**:\n
  17642. * ~~~
  17643. * Rd.W[x][31:0] = CONCAT(Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0]);
  17644. * for RV32: x=0
  17645. * ~~~
  17646. *
  17647. * \param [in] a unsigned long type of value stored in a
  17648. * \return value stored in unsigned long type
  17649. */
  17650. __STATIC_FORCEINLINE unsigned long __RV_EXPD83(unsigned long a)
  17651. {
  17652. unsigned long result;
  17653. __ASM volatile("expd83 %0, %1" : "=r"(result) : "r"(a));
  17654. return result;
  17655. }
  17656. /* ===== Inline Function End for EXPD83 ===== */
  17657. #if (__RISCV_XLEN == 64)
  17658. /* ===== Inline Function Start for EXPD84 ===== */
  17659. /**
  17660. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17661. * \brief EXPD84 (Expand and Copy Byte 4 to 64bit)
  17662. * \details
  17663. * **Type**: DSP
  17664. *
  17665. * **Syntax**:\n
  17666. * ~~~
  17667. * EXPD84 Rd, Rs1
  17668. * ~~~
  17669. *
  17670. * **Purpose**:\n
  17671. * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
  17672. *
  17673. * **Description**:\n
  17674. * Moves Rs1.B[4][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17675. *
  17676. * **Operations**:\n
  17677. * ~~~
  17678. * Rd.W[x][31:0] = CONCAT(Rs1.B[4][7:0], Rs1.B[4][7:0], Rs1.B[4][7:0], Rs1.B[4][7:0]);
  17679. * for RV32: x=0
  17680. * ~~~
  17681. *
  17682. * \param [in] a unsigned long type of value stored in a
  17683. * \return value stored in unsigned long type
  17684. */
  17685. __STATIC_FORCEINLINE unsigned long __RV_EXPD84(unsigned long a)
  17686. {
  17687. unsigned long result;
  17688. __ASM volatile("expd84 %0, %1" : "=r"(result) : "r"(a));
  17689. return result;
  17690. }
  17691. /* ===== Inline Function End for EXPD84 ===== */
  17692. /* ===== Inline Function Start for EXPD85 ===== */
  17693. /**
  17694. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17695. * \brief EXPD85 (Expand and Copy Byte 5 to 64bit)
  17696. * \details
  17697. * **Type**: DSP
  17698. *
  17699. * **Syntax**:\n
  17700. * ~~~
  17701. * EXPD85 Rd, Rs1
  17702. * ~~~
  17703. *
  17704. * **Purpose**:\n
  17705. * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
  17706. *
  17707. * **Description**:\n
  17708. * Moves Rs1.B[5][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17709. *
  17710. * **Operations**:\n
  17711. * ~~~
  17712. * Rd.W[x][31:0] = CONCAT(Rs1.B[5][7:0], Rs1.B[5][7:0], Rs1.B[5][7:0], Rs1.B[5][7:0]);
  17713. * for RV32: x=0
  17714. * ~~~
  17715. *
  17716. * \param [in] a unsigned long type of value stored in a
  17717. * \return value stored in unsigned long type
  17718. */
  17719. __STATIC_FORCEINLINE unsigned long __RV_EXPD85(unsigned long a)
  17720. {
  17721. unsigned long result;
  17722. __ASM volatile("expd85 %0, %1" : "=r"(result) : "r"(a));
  17723. return result;
  17724. }
  17725. /* ===== Inline Function End for EXPD85 ===== */
  17726. /* ===== Inline Function Start for EXPD86 ===== */
  17727. /**
  17728. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17729. * \brief EXPD86 (Expand and Copy Byte 6 to 64bit)
  17730. * \details
  17731. * **Type**: DSP
  17732. *
  17733. * **Syntax**:\n
  17734. * ~~~
  17735. * EXPD86 Rd, Rs1
  17736. * ~~~
  17737. *
  17738. * **Purpose**:\n
  17739. * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
  17740. *
  17741. * **Description**:\n
  17742. * Moves Rs1.B[6][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17743. *
  17744. * **Operations**:\n
  17745. * ~~~
  17746. * Rd.W[x][31:0] = CONCAT(Rs1.B[6][7:0], Rs1.B[6][7:0], Rs1.B[6][7:0], Rs1.B[6][7:0]);
  17747. * for RV32: x=0
  17748. * ~~~
  17749. *
  17750. * \param [in] a unsigned long type of value stored in a
  17751. * \return value stored in unsigned long type
  17752. */
  17753. __STATIC_FORCEINLINE unsigned long __RV_EXPD86(unsigned long a)
  17754. {
  17755. unsigned long result;
  17756. __ASM volatile("expd86 %0, %1" : "=r"(result) : "r"(a));
  17757. return result;
  17758. }
  17759. /* ===== Inline Function End for EXPD86 ===== */
  17760. /* ===== Inline Function Start for EXPD87 ===== */
  17761. /**
  17762. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17763. * \brief EXPD87 (Expand and Copy Byte 7 to 64bit)
  17764. * \details
  17765. * **Type**: DSP
  17766. *
  17767. * **Syntax**:\n
  17768. * ~~~
  17769. * EXPD87 Rd, Rs1
  17770. * ~~~
  17771. *
  17772. * **Purpose**:\n
  17773. * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
  17774. *
  17775. * **Description**:\n
  17776. * Moves Rs1.B[7][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17777. *
  17778. * **Operations**:\n
  17779. * ~~~
  17780. * Rd.W[x][31:0] = CONCAT(Rs1.B[7][7:0], Rs1.B[7][7:0], Rs1.B[7][7:0], Rs1.B[7][7:0]);
  17781. * for RV32: x=0
  17782. * ~~~
  17783. *
  17784. * \param [in] a unsigned long type of value stored in a
  17785. * \return value stored in unsigned long type
  17786. */
  17787. __STATIC_FORCEINLINE unsigned long __RV_EXPD87(unsigned long a)
  17788. {
  17789. unsigned long result;
  17790. __ASM volatile("expd87 %0, %1" : "=r"(result) : "r"(a));
  17791. return result;
  17792. }
  17793. /* ===== Inline Function End for EXPD87 ===== */
  17794. #endif /* __RISCV_XLEN == 64 */
  17795. #if (__RISCV_XLEN == 32) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__)
  17796. /* XXXXX Nuclei Extended DSP Instructions for RV32 XXXXX */
  17797. /**
  17798. * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1 Nuclei N1 SIMD DSP Additional Instructions
  17799. * \ingroup NMSIS_Core_DSP_Intrinsic
  17800. * \brief (RV32 only)Nuclei Customized N1 DSP Instructions
  17801. * \details This is Nuclei customized DSP N1 instructions only for RV32
  17802. */
  17803. /**
  17804. * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 Nuclei N2 SIMD DSP Additional Instructions
  17805. * \ingroup NMSIS_Core_DSP_Intrinsic
  17806. * \brief (RV32 only)Nuclei Customized N2 DSP Instructions
  17807. * \details This is Nuclei customized DSP N2 instructions only for RV32
  17808. */
  17809. /**
  17810. * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 Nuclei N3 SIMD DSP Additional Instructions
  17811. * \ingroup NMSIS_Core_DSP_Intrinsic
  17812. * \brief (RV32 only)Nuclei Customized N3 DSP Instructions
  17813. * \details This is Nuclei customized DSP N3 instructions only for RV32
  17814. */
  17815. /* ===== Inline Function Start for DKHM8 ===== */
  17816. /**
  17817. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  17818. * \brief DKHM8 (64-bit SIMD Signed Saturating Q7 Multiply)
  17819. * \details
  17820. * **Type**: SIMD
  17821. *
  17822. * **Syntax**:\n
  17823. * ~~~
  17824. * DKHM8 Rd, Rs1, Rs2
  17825. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  17826. * ~~~
  17827. *
  17828. * **Purpose**:\n
  17829. * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
  17830. * numbers again.
  17831. *
  17832. * **Description**:\n
  17833. * For the `DKHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
  17834. * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
  17835. * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
  17836. *
  17837. * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
  17838. * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
  17839. * The result will be saturated to 0x7F and the overflow flag OV will be set.
  17840. *
  17841. * **Operations**:\n
  17842. * ~~~
  17843. * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
  17844. * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
  17845. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  17846. * if (0x80 != aop | 0x80 != bop) {
  17847. * res = (aop s* bop) >> 7;
  17848. * } else {
  17849. * res= 0x7F;
  17850. * OV = 1;
  17851. * }
  17852. * }
  17853. * Rd.H[x/2] = concat(rest, resb);
  17854. * for RV32, x=0,2,4,6
  17855. * ~~~
  17856. *
  17857. * \param [in] a unsigned long long type of value stored in a
  17858. * \param [in] b unsigned long long type of value stored in b
  17859. * \return value stored in unsigned long long type
  17860. */
  17861. __STATIC_FORCEINLINE unsigned long long __RV_DKHM8(unsigned long long a, unsigned long long b)
  17862. {
  17863. unsigned long long result;
  17864. __ASM volatile("dkhm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17865. return result;
  17866. }
  17867. /* ===== Inline Function End for DKHM8 ===== */
  17868. /* ===== Inline Function Start for DKHM16 ===== */
  17869. /**
  17870. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  17871. * \brief DKHM16 (64-bit SIMD Signed Saturating Q15 Multiply)
  17872. * \details
  17873. * **Type**: SIMD
  17874. *
  17875. * **Syntax**:\n
  17876. * ~~~
  17877. * DKHM16 Rd, Rs1, Rs2
  17878. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  17879. * ~~~
  17880. *
  17881. * **Purpose**:\n
  17882. * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
  17883. * Q15 numbers again.
  17884. *
  17885. * **Description**:\n
  17886. * For the `DKHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
  17887. * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
  17888. * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
  17889. * Rs2.
  17890. *
  17891. * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
  17892. * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
  17893. * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
  17894. *
  17895. * **Operations**:\n
  17896. * ~~~
  17897. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
  17898. * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
  17899. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  17900. * if (0x8000 != aop | 0x8000 != bop) {
  17901. * res = (aop s* bop) >> 15;
  17902. * } else {
  17903. * res= 0x7FFF;
  17904. * OV = 1;
  17905. * }
  17906. * }
  17907. * Rd.W[x/2] = concat(rest, resb);
  17908. * for RV32: x=0, 2
  17909. * ~~~
  17910. *
  17911. * \param [in] a unsigned long long type of value stored in a
  17912. * \param [in] b unsigned long long type of value stored in b
  17913. * \return value stored in unsigned long long type
  17914. */
  17915. __STATIC_FORCEINLINE unsigned long long __RV_DKHM16(unsigned long long a, unsigned long long b)
  17916. {
  17917. unsigned long long result;
  17918. __ASM volatile("dkhm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17919. return result;
  17920. }
  17921. /* ===== Inline Function End for DKHM16 ===== */
  17922. /* ===== Inline Function Start for DKABS8 ===== */
  17923. /**
  17924. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  17925. * \brief DKABS8 (64-bit SIMD 8-bit Saturating Absolute)
  17926. * \details
  17927. * **Type**: SIMD
  17928. *
  17929. * **Syntax**:\n
  17930. * ~~~
  17931. * DKABS8 Rd, Rs1
  17932. * # Rd, Rs1 are all even/odd pair of registers
  17933. * ~~~
  17934. *
  17935. * **Purpose**:\n
  17936. * Get the absolute value of 8-bit signed integer elements simultaneously.
  17937. *
  17938. * **Description**:\n
  17939. * This instruction calculates the absolute value of 8-bit signed integer elements stored
  17940. * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates
  17941. * 0x7f as the output and sets the OV bit to 1.
  17942. *
  17943. * **Operations**:\n
  17944. * ~~~
  17945. * src = Rs1.B[x];
  17946. * if (src == 0x80) {
  17947. * src = 0x7f;
  17948. * OV = 1;
  17949. * } else if (src[7] == 1)
  17950. * src = -src;
  17951. * }
  17952. * Rd.B[x] = src;
  17953. * for RV32: x=7...0,
  17954. * ~~~
  17955. *
  17956. * \param [in] a unsigned long long type of value stored in a
  17957. * \return value stored in unsigned long long type
  17958. */
  17959. __STATIC_FORCEINLINE unsigned long long __RV_DKABS8(unsigned long long a)
  17960. {
  17961. unsigned long long result;
  17962. __ASM volatile("dkabs8 %0, %1" : "=r"(result) : "r"(a));
  17963. return result;
  17964. }
  17965. /* ===== Inline Function End for DKABS8 ===== */
  17966. /* ===== Inline Function Start for DKABS16 ===== */
  17967. /**
  17968. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  17969. * \brief DKABS16 (64-bit SIMD 16-bit Saturating Absolute)
  17970. * \details
  17971. * **Type**: SIMD
  17972. *
  17973. * **Syntax**:\n
  17974. * ~~~
  17975. * DKABS16 Rd, Rs1
  17976. * # Rd, Rs1 are all even/odd pair of registers
  17977. * ~~~
  17978. *
  17979. * **Purpose**:\n
  17980. * Get the absolute value of 16-bit signed integer elements simultaneously.
  17981. *
  17982. * **Description**:\n
  17983. * This instruction calculates the absolute value of 16-bit signed integer elements stored
  17984. * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction
  17985. * generates 0x7fff as the output and sets the OV bit to 1.
  17986. *
  17987. * **Operations**:\n
  17988. * ~~~
  17989. * src = Rs1.H[x];
  17990. * if (src == 0x8000) {
  17991. * src = 0x7fff;
  17992. * OV = 1;
  17993. * } else if (src[15] == 1)
  17994. * src = -src;
  17995. * }
  17996. * Rd.H[x] = src;
  17997. * for RV32: x=3...0,
  17998. * ~~~
  17999. *
  18000. * \param [in] a unsigned long long type of value stored in a
  18001. * \return value stored in unsigned long long type
  18002. */
  18003. __STATIC_FORCEINLINE unsigned long long __RV_DKABS16(unsigned long long a)
  18004. {
  18005. unsigned long long result;
  18006. __ASM volatile("dkabs16 %0, %1" : "=r"(result) : "r"(a));
  18007. return result;
  18008. }
  18009. /* ===== Inline Function End for DKABS16 ===== */
  18010. /* ===== Inline Function Start for DKSLRA8 ===== */
  18011. /**
  18012. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  18013. * \brief DKSLRA8 (64-bit SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
  18014. * \details
  18015. * **Type**: SIMD
  18016. *
  18017. * **Syntax**:\n
  18018. * ~~~
  18019. * DKSLRA8 Rd, Rs1, Rs2
  18020. * # Rd, Rs1 are all even/odd pair of registers
  18021. * ~~~
  18022. *
  18023. * **Purpose**:\n
  18024. * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  18025. * Q7 saturation for the left shift.
  18026. *
  18027. * **Description**:\n
  18028. * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  18029. * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
  18030. * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
  18031. * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
  18032. * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
  18033. * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1].
  18034. * If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
  18035. * this instruction.
  18036. *
  18037. * **Operations**:\n
  18038. * ~~~
  18039. * if (Rs2[3:0] < 0) {
  18040. * sa = -Rs2[3:0];
  18041. * sa = (sa == 8)? 7 : sa;
  18042. * Rd.B[x] = SE8(Rs1.B[x][7:sa]);
  18043. * } else {
  18044. * sa = Rs2[2:0];
  18045. * res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
  18046. * if (res > (2^7)-1) {
  18047. * res[7:0] = 0x7f; OV = 1;
  18048. * } else if (res < -2^7) {
  18049. * res[7:0] = 0x80; OV = 1;
  18050. * }
  18051. * Rd.B[x] = res[7:0];
  18052. * }
  18053. * for RV32: x=7...0,
  18054. * ~~~
  18055. *
  18056. * \param [in] a unsigned long long type of value stored in a
  18057. * \param [in] b int type of value stored in b
  18058. * \return value stored in unsigned long long type
  18059. */
  18060. __STATIC_FORCEINLINE unsigned long long __RV_DKSLRA8(unsigned long long a, int b)
  18061. {
  18062. unsigned long long result;
  18063. __ASM volatile("dkslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18064. return result;
  18065. }
  18066. /* ===== Inline Function End for DKSLRA8 ===== */
  18067. /* ===== Inline Function Start for DKSLRA16 ===== */
  18068. /**
  18069. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  18070. * \brief DKSLRA16 (64-bit SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
  18071. * \details
  18072. * **Type**: SIMD
  18073. *
  18074. * **Syntax**:\n
  18075. * ~~~
  18076. * DKSLRA16 Rd, Rs1, Rs2
  18077. * # Rd, Rs1 are all even/odd pair of registers
  18078. * ~~~
  18079. *
  18080. * **Purpose**:\n
  18081. * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  18082. * Q15 saturation for the left shift.
  18083. *
  18084. * **Description**:\n
  18085. * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  18086. * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
  18087. * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
  18088. * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
  18089. * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
  18090. * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1].
  18091. * After the shift, saturation, or rounding, the final results are written to
  18092. * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
  18093. * this instruction.
  18094. *
  18095. * **Operations**:\n
  18096. * ~~~
  18097. * if (Rs2[4:0] < 0) {
  18098. * sa = -Rs2[4:0];
  18099. * sa = (sa == 16)? 15 : sa;
  18100. * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
  18101. * } else {
  18102. * sa = Rs2[3:0];
  18103. * res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
  18104. * if (res > (2^15)-1) {
  18105. * res[15:0] = 0x7fff; OV = 1;
  18106. * } else if (res < -2^15) {
  18107. * res[15:0] = 0x8000; OV = 1;
  18108. * }
  18109. * d.H[x] = res[15:0];
  18110. * }
  18111. * for RV32: x=3...0,
  18112. * ~~~
  18113. *
  18114. * \param [in] a unsigned long long type of value stored in a
  18115. * \param [in] b int type of value stored in b
  18116. * \return value stored in unsigned long long type
  18117. */
  18118. __STATIC_FORCEINLINE unsigned long long __RV_DKSLRA16(unsigned long long a, int b)
  18119. {
  18120. unsigned long long result;
  18121. __ASM volatile("dkslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18122. return result;
  18123. }
  18124. /* ===== Inline Function End for DKSLRA16 ===== */
  18125. /* ===== Inline Function Start for DKADD8 ===== */
  18126. /**
  18127. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  18128. * \brief DKADD8 (64-bit SIMD 8-bit Signed Saturating Addition)
  18129. * \details
  18130. * **Type**: SIMD
  18131. *
  18132. * **Syntax**:\n
  18133. * ~~~
  18134. * DKADD8 Rd, Rs1, Rs2
  18135. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18136. * ~~~
  18137. *
  18138. * **Purpose**:\n
  18139. * Do 8-bit signed integer element saturating additions simultaneously.
  18140. *
  18141. * **Description**:\n
  18142. * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
  18143. * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they
  18144. * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
  18145. *
  18146. * **Operations**:\n
  18147. * ~~~
  18148. * res[x] = Rs1.B[x] + Rs2.B[x];
  18149. * if (res[x] > 127) {
  18150. * res[x] = 127;
  18151. * OV = 1;
  18152. * } else if (res[x] < -128) {
  18153. * res[x] = -128;
  18154. * OV = 1;
  18155. * }
  18156. * Rd.B[x] = res[x];
  18157. * for RV32: x=7...0,
  18158. * ~~~
  18159. *
  18160. * \param [in] a unsigned long long type of value stored in a
  18161. * \param [in] b unsigned long long type of value stored in b
  18162. * \return value stored in unsigned long long type
  18163. */
  18164. __STATIC_FORCEINLINE unsigned long long __RV_DKADD8(unsigned long long a, unsigned long long b)
  18165. {
  18166. unsigned long long result;
  18167. __ASM volatile("dkadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18168. return result;
  18169. }
  18170. /* ===== Inline Function End for DKADD8 ===== */
  18171. /* ===== Inline Function Start for DKADD16 ===== */
  18172. /**
  18173. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  18174. * \brief DKADD16 (64-bit SIMD 16-bit Signed Saturating Addition)
  18175. * \details
  18176. * **Type**: SIMD
  18177. *
  18178. * **Syntax**:\n
  18179. * ~~~
  18180. * DKADD16 Rd, Rs1, Rs2
  18181. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18182. * ~~~
  18183. *
  18184. * **Purpose**:\n
  18185. * Do 16-bit signed integer element saturating additions simultaneously.
  18186. *
  18187. * **Description**:\n
  18188. * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
  18189. * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1),
  18190. * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
  18191. *
  18192. * **Operations**:\n
  18193. * ~~~
  18194. * res[x] = Rs1.H[x] + Rs2.H[x];
  18195. * if (res[x] > 32767) {
  18196. * res[x] = 32767;
  18197. * OV = 1;
  18198. * } else if (res[x] < -32768) {
  18199. * res[x] = -32768;
  18200. * OV = 1;
  18201. * }
  18202. * Rd.H[x] = res[x];
  18203. * for RV32: x=3...0,
  18204. * ~~~
  18205. *
  18206. * \param [in] a unsigned long long type of value stored in a
  18207. * \param [in] b unsigned long long type of value stored in b
  18208. * \return value stored in unsigned long long type
  18209. */
  18210. __STATIC_FORCEINLINE unsigned long long __RV_DKADD16(unsigned long long a, unsigned long long b)
  18211. {
  18212. unsigned long long result;
  18213. __ASM volatile("dkadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18214. return result;
  18215. }
  18216. /* ===== Inline Function End for DKADD16 ===== */
  18217. /* ===== Inline Function Start for DKSUB8 ===== */
  18218. /**
  18219. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  18220. * \brief DKSUB8 (64-bit SIMD 8-bit Signed Saturating Subtraction)
  18221. * \details
  18222. * **Type**: SIMD
  18223. *
  18224. * **Syntax**:\n
  18225. * ~~~
  18226. * DKSUB8 Rd, Rs1, Rs2
  18227. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18228. * ~~~
  18229. *
  18230. * **Purpose**:\n
  18231. * Do 8-bit signed elements saturating subtractions simultaneously.
  18232. *
  18233. * **Description**:\n
  18234. * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
  18235. * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1),
  18236. * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
  18237. *
  18238. * **Operations**:\n
  18239. * ~~~
  18240. * res[x] = Rs1.B[x] - Rs2.B[x];
  18241. * if (res[x] > (2^7)-1) {
  18242. * res[x] = (2^7)-1;
  18243. * OV = 1;
  18244. * } else if (res[x] < -2^7) {
  18245. * res[x] = -2^7;
  18246. * OV = 1;
  18247. * }
  18248. * Rd.B[x] = res[x];
  18249. * for RV32: x=7...0,
  18250. * ~~~
  18251. *
  18252. * \param [in] a unsigned long long type of value stored in a
  18253. * \param [in] b unsigned long long type of value stored in b
  18254. * \return value stored in unsigned long long type
  18255. */
  18256. __STATIC_FORCEINLINE unsigned long long __RV_DKSUB8(unsigned long long a, unsigned long long b)
  18257. {
  18258. unsigned long long result;
  18259. __ASM volatile("dksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18260. return result;
  18261. }
  18262. /* ===== Inline Function End for DKSUB8 ===== */
  18263. /* ===== Inline Function Start for DKSUB16 ===== */
  18264. /**
  18265. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  18266. * \brief DKSUB16 (64-bit SIMD 16-bit Signed Saturating Subtraction)
  18267. * \details
  18268. * **Type**: SIMD
  18269. *
  18270. * **Syntax**:\n
  18271. * ~~~
  18272. * DKSUB16 Rd, Rs1, Rs2
  18273. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18274. * ~~~
  18275. *
  18276. * **Purpose**:\n
  18277. * Do 16-bit signed integer elements saturating subtractions simultaneously.
  18278. *
  18279. * **Description**:\n
  18280. * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
  18281. * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <=
  18282. * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
  18283. * Rd.
  18284. *
  18285. * **Operations**:\n
  18286. * ~~~
  18287. * res[x] = Rs1.H[x] - Rs2.H[x];
  18288. * if (res[x] > (2^15)-1) {
  18289. * res[x] = (2^15)-1;
  18290. * OV = 1;
  18291. * } else if (res[x] < -2^15) {
  18292. * res[x] = -2^15;
  18293. * OV = 1;
  18294. * }
  18295. * Rd.H[x] = res[x];
  18296. * for RV32: x=3...0,
  18297. * ~~~
  18298. *
  18299. * \param [in] a unsigned long long type of value stored in a
  18300. * \param [in] b unsigned long long type of value stored in b
  18301. * \return value stored in unsigned long long type
  18302. */
  18303. __STATIC_FORCEINLINE unsigned long long __RV_DKSUB16(unsigned long long a, unsigned long long b)
  18304. {
  18305. unsigned long long result;
  18306. __ASM volatile("dksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18307. return result;
  18308. }
  18309. /* ===== Inline Function End for DKSUB16 ===== */
  18310. /* ===== Inline Function Start for DKHMX8 ===== */
  18311. /**
  18312. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18313. * \brief DKHMX8 (64-bit SIMD Signed Crossed Saturating Q7 Multiply)
  18314. * \details
  18315. * **Type**: SIMD
  18316. *
  18317. * **Syntax**:\n
  18318. * ~~~
  18319. * DKHMX8 Rd, Rs1, Rs2
  18320. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18321. * ~~~
  18322. *
  18323. * **Purpose**:\n
  18324. * Do Q7xQ7 element crossed multiplications simultaneously. The Q15 results are then reduced to Q7 numbers again.
  18325. *
  18326. * **Description**:\n
  18327. * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
  18328. * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
  18329. * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
  18330. *
  18331. * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
  18332. * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
  18333. * The result will be saturated to 0x7F and the overflow flag OV will be set.
  18334. *
  18335. * **Operations**:\n
  18336. * ~~~
  18337. * op1t = Rs1.B[x+1]; op2t = Rs2.B[x]; // top
  18338. * op1b = Rs1.B[x]; op2b = Rs2.B[x+1]; // bottom
  18339. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  18340. * if (0x80 != aop | 0x80 != bop) {
  18341. * res = (aop s* bop) >> 7;
  18342. * } else {
  18343. * res= 0x7F;
  18344. * OV = 1;
  18345. * }
  18346. * }
  18347. * Rd.H[x/2] = concat(rest, resb);
  18348. * for RV32, x=0,2,4,6
  18349. * ~~~
  18350. *
  18351. * \param [in] a unsigned long long type of value stored in a
  18352. * \param [in] b unsigned long long type of value stored in b
  18353. * \return value stored in unsigned long long type
  18354. */
  18355. __STATIC_FORCEINLINE unsigned long long __RV_DKHMX8(unsigned long long a, unsigned long long b)
  18356. {
  18357. unsigned long long result;
  18358. __ASM volatile("dkhmx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18359. return result;
  18360. }
  18361. /* ===== Inline Function End for DKHMX8 ===== */
  18362. /* ===== Inline Function Start for DKHMX16 ===== */
  18363. /**
  18364. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18365. * \brief DKHMX16 (64-bit SIMD Signed Crossed Saturating Q15 Multiply)
  18366. * \details
  18367. * **Type**: SIMD
  18368. *
  18369. * **Syntax**:\n
  18370. * ~~~
  18371. * DKHMX16 Rd, Rs1, Rs2
  18372. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18373. * ~~~
  18374. *
  18375. * **Purpose**:\n
  18376. * Do Q15xQ15 element crossed multiplications simultaneously. The Q31 results are then reduced to Q15 numbers again.
  18377. *
  18378. * **Description**:\n
  18379. * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
  18380. * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
  18381. * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
  18382. *
  18383. * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
  18384. * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
  18385. * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
  18386. *
  18387. * **Operations**:\n
  18388. * ~~~
  18389. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // top
  18390. * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // bottom
  18391. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  18392. * if (0x8000 != aop | 0x8000 != bop) {
  18393. * res = (aop s* bop) >> 15;
  18394. * } else {
  18395. * res= 0x7FFF;
  18396. * OV = 1;
  18397. * }
  18398. * }
  18399. * Rd.W[x/2] = concat(rest, resb);
  18400. * for RV32, x=0,2
  18401. * ~~~
  18402. *
  18403. * \param [in] a unsigned long long type of value stored in a
  18404. * \param [in] b unsigned long long type of value stored in b
  18405. * \return value stored in unsigned long long type
  18406. */
  18407. __STATIC_FORCEINLINE unsigned long long __RV_DKHMX16(unsigned long long a, unsigned long long b)
  18408. {
  18409. unsigned long long result;
  18410. __ASM volatile("dkhmx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18411. return result;
  18412. }
  18413. /* ===== Inline Function End for DKHMX16 ===== */
  18414. /* ===== Inline Function Start for DSMMUL ===== */
  18415. /**
  18416. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18417. * \brief DSMMUL (64-bit MSW 32x32 Signed Multiply)
  18418. * \details
  18419. * **Type**: SIMD
  18420. *
  18421. * **Syntax**:\n
  18422. * ~~~
  18423. * DSMMUL Rd, Rs1, Rs2
  18424. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18425. * ~~~
  18426. *
  18427. * **Purpose**:\n
  18428. * Do MSW 32x32 element signed multiplications simultaneously. The results are written into Rd.
  18429. *
  18430. * **Description**:\n
  18431. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
  18432. * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
  18433. * elements of Rs1 and Rs2 are treated as signed integers. The .u form of the instruction rounds up
  18434. * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
  18435. *
  18436. * **Operations**:\n
  18437. * ~~~
  18438. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  18439. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  18440. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  18441. * res = (aop s* bop)[63:32];
  18442. * }
  18443. * Rd = concat(rest, resb);
  18444. * x=0
  18445. * ~~~
  18446. *
  18447. * \param [in] a unsigned long long type of value stored in a
  18448. * \param [in] b unsigned long long type of value stored in b
  18449. * \return value stored in unsigned long long type
  18450. */
  18451. __STATIC_FORCEINLINE unsigned long long __RV_DSMMUL(unsigned long long a, unsigned long long b)
  18452. {
  18453. unsigned long long result;
  18454. __ASM volatile("dsmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18455. return result;
  18456. }
  18457. /* ===== Inline Function End for DSMMUL ===== */
  18458. /* ===== Inline Function Start for DSMMUL.u ===== */
  18459. /**
  18460. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18461. * \brief DSMMUL.u (64-bit MSW 32x32 Unsigned Multiply)
  18462. * \details
  18463. * **Type**: SIMD
  18464. *
  18465. * **Syntax**:\n
  18466. * ~~~
  18467. * DSMMUL.u Rd, Rs1, Rs2
  18468. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18469. * ~~~
  18470. *
  18471. * **Purpose**:\n
  18472. * Do MSW 32x32 element unsigned multiplications simultaneously. The results are written into Rd.
  18473. *
  18474. * **Description**:\n
  18475. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
  18476. * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
  18477. * elements of Rs1 and Rs2 are treated as unsigned integers. The .u form of the instruction rounds up
  18478. * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
  18479. *
  18480. * **Operations**:\n
  18481. * ~~~
  18482. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  18483. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  18484. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  18485. * res = RUND(aop u* bop)[63:32];
  18486. * }
  18487. * Rd = concat(rest, resb);
  18488. * x=0
  18489. * ~~~
  18490. *
  18491. * \param [in] a unsigned long long type of value stored in a
  18492. * \param [in] b unsigned long long type of value stored in b
  18493. * \return value stored in unsigned long long type
  18494. */
  18495. __STATIC_FORCEINLINE unsigned long long __RV_DSMMUL_U(unsigned long long a, unsigned long long b)
  18496. {
  18497. unsigned long long result;
  18498. __ASM volatile("dsmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18499. return result;
  18500. }
  18501. /* ===== Inline Function End for DSMMUL.u ===== */
  18502. /* ===== Inline Function Start for DKWMMUL ===== */
  18503. /**
  18504. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18505. * \brief DKWMMUL (64-bit MSW 32x32 Signed Multiply & Double)
  18506. * \details
  18507. * **Type**: SIMD
  18508. *
  18509. * **Syntax**:\n
  18510. * ~~~
  18511. * DKWMMUL Rd, Rs1, Rs2
  18512. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18513. * ~~~
  18514. *
  18515. * **Purpose**:\n
  18516. * Do MSW 32x32 element signed multiplications simultaneously and double. The results are written into Rd.
  18517. *
  18518. * **Description**:\n
  18519. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
  18520. * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
  18521. * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
  18522. * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The .u
  18523. * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
  18524. * 30 before the shift and saturation operations.
  18525. *
  18526. * **Operations**:\n
  18527. * ~~~
  18528. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  18529. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  18530. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  18531. * res = sat.q31((aop s* bop) << 1)[63:32];
  18532. * }
  18533. * Rd = concat(rest, resb);
  18534. * x=0
  18535. * ~~~
  18536. *
  18537. * \param [in] a unsigned long long type of value stored in a
  18538. * \param [in] b unsigned long long type of value stored in b
  18539. * \return value stored in unsigned long long type
  18540. */
  18541. __STATIC_FORCEINLINE unsigned long long __RV_DKWMMUL(unsigned long long a, unsigned long long b)
  18542. {
  18543. unsigned long long result;
  18544. __ASM volatile("dkwmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18545. return result;
  18546. }
  18547. /* ===== Inline Function End for DKWMMUL ===== */
  18548. /* ===== Inline Function Start for DKWMMUL.u ===== */
  18549. /**
  18550. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18551. * \brief DKWMMUL.u (64-bit MSW 32x32 Unsigned Multiply & Double)
  18552. * \details
  18553. * **Type**: SIMD
  18554. *
  18555. * **Syntax**:\n
  18556. * ~~~
  18557. * DKWMMUL.u Rd, Rs1, Rs2
  18558. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18559. * ~~~
  18560. *
  18561. * **Purpose**:\n
  18562. * Do MSW 32x32 element unsigned multiplications simultaneously and double. The results are written into Rd.
  18563. *
  18564. * **Description**:\n
  18565. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
  18566. * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
  18567. * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
  18568. * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The .u
  18569. * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
  18570. * 30 before the shift and saturation operations.
  18571. *
  18572. * **Operations**:\n
  18573. * ~~~
  18574. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  18575. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  18576. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  18577. * res = sat.q31(RUND(aop u* bop) << 1)[63:32];
  18578. * }
  18579. * Rd = concat(rest, resb);
  18580. * x=0
  18581. * ~~~
  18582. *
  18583. * \param [in] a unsigned long long type of value stored in a
  18584. * \param [in] b unsigned long long type of value stored in b
  18585. * \return value stored in unsigned long long type
  18586. */
  18587. __STATIC_FORCEINLINE unsigned long long __RV_DKWMMUL_U(unsigned long long a, unsigned long long b)
  18588. {
  18589. unsigned long long result;
  18590. __ASM volatile("dkwmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18591. return result;
  18592. }
  18593. /* ===== Inline Function End for DKWMMUL.u ===== */
  18594. /* ===== Inline Function Start for DKABS32 ===== */
  18595. /**
  18596. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18597. * \brief DKABS32 (64-bit SIMD 32-bit Saturating Absolute)
  18598. * \details
  18599. * **Type**: SIMD
  18600. *
  18601. * **Syntax**:\n
  18602. * ~~~
  18603. * DKABS32 Rd, Rs1
  18604. * # Rd, Rs1 are all even/odd pair of registers
  18605. * ~~~
  18606. *
  18607. * **Purpose**:\n
  18608. * Get the absolute value of 32-bit signed integer elements simultaneously.
  18609. *
  18610. * **Description**:\n
  18611. * This instruction calculates the absolute value of 32-bit signed integer elements stored in Rs1 and writes the element
  18612. * results to Rd. If the input number is 0x8000_0000, this instruction generates 0x7fff_ffff as the output and sets the OV
  18613. * bit to 1.
  18614. *
  18615. * **Operations**:\n
  18616. * ~~~
  18617. * src = Rs1.W[x];
  18618. * if (src == 0x8000_0000) {
  18619. * src = 0x7fff_ffff;
  18620. * OV = 1;
  18621. * } else if (src[31] == 1)
  18622. * src = -src;
  18623. * }
  18624. * Rd.W[x] = src;
  18625. * x=1...0
  18626. * ~~~
  18627. *
  18628. * \param [in] a unsigned long long type of value stored in a
  18629. * \return value stored in unsigned long long type
  18630. */
  18631. __STATIC_FORCEINLINE unsigned long long __RV_DKABS32(unsigned long long a)
  18632. {
  18633. unsigned long long result;
  18634. __ASM volatile("dkabs32 %0, %1" : "=r"(result) : "r"(a));
  18635. return result;
  18636. }
  18637. /* ===== Inline Function End for DKABS32 ===== */
  18638. /* ===== Inline Function Start for DKSLRA32 ===== */
  18639. /**
  18640. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18641. * \brief DKSLRA32 (64-bit SIMD 32-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
  18642. * \details
  18643. * **Type**: SIMD
  18644. *
  18645. * **Syntax**:\n
  18646. * ~~~
  18647. * DKSLRA32 Rd, Rs1, Rs2
  18648. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18649. * ~~~
  18650. *
  18651. * **Purpose**:\n
  18652. * Do 31-bit elements logical left (positive) or arithmetic right (negative) shift operation with Q31 saturation for the left shift.
  18653. *
  18654. * **Description**:\n
  18655. * The 31-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically based on the value of Rs2[5:0].
  18656. * Rs2[5:0] is in the signed range of [-2^5, 2^5-1]. A positive Rs2[5:0] means logical left shift and a negative Rs2[4:0]
  18657. * means arithmetic right shift. The shift amount is the absolute value of Rs2[5:0]. However, the behavior of Rs2[5:0]==-
  18658. * 2^5 (0x20) is defined to be equivalent to the behavior of Rs2[5:0]==-(2^5-1) (0x21).
  18659. *
  18660. * **Operations**:\n
  18661. * ~~~
  18662. * if (Rs2[5:0] < 0) {
  18663. * sa = -Rs2[5:0];
  18664. * sa = (sa == 32)? 31 : sa;
  18665. * Rd.W[x] = SE32(Rs1.W[x][31:sa]);
  18666. * } else {
  18667. * sa = Rs2[4:0];
  18668. * res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
  18669. * if (res > (2^31)-1) {
  18670. * res[31:0] = 0x7fff_ffff; OV = 1;
  18671. * } else if (res < -2^31) {
  18672. * res[31:0] = 0x8000_0000; OV = 1;
  18673. * }
  18674. * Rd.W[x] = res[31:0];
  18675. * }
  18676. * x=1...0
  18677. * ~~~
  18678. *
  18679. * \param [in] a unsigned long long type of value stored in a
  18680. * \param [in] b int type of value stored in b
  18681. * \return value stored in unsigned long long type
  18682. */
  18683. __STATIC_FORCEINLINE unsigned long long __RV_DKSLRA32(unsigned long long a, int b)
  18684. {
  18685. unsigned long long result;
  18686. __ASM volatile("dkslra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18687. return result;
  18688. }
  18689. /* ===== Inline Function End for DKSLRA32 ===== */
  18690. /* ===== Inline Function Start for DKADD32 ===== */
  18691. /**
  18692. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18693. * \brief DKADD32(64-bit SIMD 32-bit Signed Saturating Addition)
  18694. * \details
  18695. * **Type**: SIMD
  18696. *
  18697. * **Syntax**:\n
  18698. * ~~~
  18699. * DKADD32 Rd, Rs1, Rs2
  18700. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18701. * ~~~
  18702. *
  18703. * **Purpose**:\n
  18704. * Do 32-bit signed integer element saturating additions simultaneously.
  18705. *
  18706. * **Description**:\n
  18707. * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed integer elements in Rs2. If any
  18708. * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV
  18709. * bit is set to 1. The saturated results are written to Rd.
  18710. *
  18711. * **Operations**:\n
  18712. * ~~~
  18713. * res[x] = Rs1.W[x] + Rs2.W[x];
  18714. * if (res[x] > 0x7fff_ffff) {
  18715. * res[x] = 0x7fff_ffff;
  18716. * OV = 1;
  18717. * } else if (res[x] < 0x8000_0000) {
  18718. * res[x] = 0x8000_0000;
  18719. * OV = 1;
  18720. * }
  18721. * Rd.W[x] = res[x];
  18722. * x=1...0
  18723. * ~~~
  18724. *
  18725. * \param [in] a unsigned long long type of value stored in a
  18726. * \param [in] b unsigned long long type of value stored in b
  18727. * \return value stored in unsigned long long type
  18728. */
  18729. __STATIC_FORCEINLINE unsigned long long __RV_DKADD32(unsigned long long a, unsigned long long b)
  18730. {
  18731. unsigned long long result;
  18732. __ASM volatile("dkadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18733. return result;
  18734. }
  18735. /* ===== Inline Function End for DKADD32 ===== */
  18736. /* ===== Inline Function Start for DKSUB32 ===== */
  18737. /**
  18738. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18739. * \brief DKSUB32 (64-bit SIMD 32-bit Signed Saturating Subtraction)
  18740. * \details
  18741. * **Type**: SIMD
  18742. *
  18743. * **Syntax**:\n
  18744. * ~~~
  18745. * DKSUB32 Rd, Rs1, Rs2
  18746. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18747. * ~~~
  18748. *
  18749. * **Purpose**:\n
  18750. * Do 32-bit signed integer element saturating subtractions simultaneously.
  18751. *
  18752. * **Description**:\n
  18753. * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1. If
  18754. * any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the
  18755. * OV bit is set to 1. The saturated results are written to Rd.
  18756. *
  18757. * **Operations**:\n
  18758. * ~~~
  18759. * res[x] = Rs1.W[x] - Rs2.W[x];
  18760. * if (res[x] > (2^31)-1) {
  18761. * res[x] = (2^31)-1;
  18762. * OV = 1;
  18763. * } else if (res[x] < -2^31) {
  18764. * res[x] = -2^31;
  18765. * OV = 1;
  18766. * }
  18767. * Rd.W[x] = res[x];
  18768. * x=1...0
  18769. * ~~~
  18770. *
  18771. * \param [in] a unsigned long long type of value stored in a
  18772. * \param [in] b unsigned long long type of value stored in b
  18773. * \return value stored in unsigned long long type
  18774. */
  18775. __STATIC_FORCEINLINE unsigned long long __RV_DKSUB32(unsigned long long a, unsigned long long b)
  18776. {
  18777. unsigned long long result;
  18778. __ASM volatile("dksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18779. return result;
  18780. }
  18781. /* ===== Inline Function End for DKSUB32 ===== */
  18782. /* ===== Inline Function Start for DRADD16 ===== */
  18783. /**
  18784. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18785. * \brief DRADD16 (64-bit SIMD 16-bit Halving Signed Addition)
  18786. * \details
  18787. * **Type**: SIMD
  18788. *
  18789. * **Syntax**:\n
  18790. * ~~~
  18791. * DRADD16 Rd, Rs1, Rs2
  18792. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18793. * ~~~
  18794. *
  18795. * **Purpose**:\n
  18796. * Do 16-bit signed integer element additions simultaneously. The results are halved to avoid overflow or saturation.
  18797. *
  18798. * **Description**:\n
  18799. * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed integer elements in Rs2. The results
  18800. * are first arithmetically right-shifted by 1 bit and then written to Rd.
  18801. *
  18802. * **Operations**:\n
  18803. * ~~~
  18804. * Rd.H[x] = [(Rs1.H[x]) + (Rs2.H[x])] s>> 1;
  18805. * x=3...0
  18806. * ~~~
  18807. *
  18808. * \param [in] a unsigned long long type of value stored in a
  18809. * \param [in] b unsigned long long type of value stored in b
  18810. * \return value stored in unsigned long long type
  18811. */
  18812. __STATIC_FORCEINLINE unsigned long long __RV_DRADD16(unsigned long long a, unsigned long long b)
  18813. {
  18814. unsigned long long result;
  18815. __ASM volatile("dradd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18816. return result;
  18817. }
  18818. /* ===== Inline Function End for DRADD16 ===== */
  18819. /* ===== Inline Function Start for DSUB16 ===== */
  18820. /**
  18821. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18822. * \brief DSUB16 (64-bit SIMD 16-bit Halving Signed Subtraction)
  18823. * \details
  18824. * **Type**: SIMD
  18825. *
  18826. * **Syntax**:\n
  18827. * ~~~
  18828. * DSUB16 Rd, Rs1, Rs2
  18829. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18830. * ~~~
  18831. *
  18832. * **Purpose**:\n
  18833. * Do 16-bit integer element subtractions simultaneously.
  18834. *
  18835. * **Description**:\n
  18836. * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed integer elements in Rs2. The results
  18837. * are first arithmetically right-shifted by 1 bit and then written to Rd.
  18838. *
  18839. * **Operations**:\n
  18840. * ~~~
  18841. * Rd.H[x] = [(Rs1.H[x]) - (Rs2.H[x])] ;
  18842. * x=3...0
  18843. * ~~~
  18844. *
  18845. * \param [in] a unsigned long long type of value stored in a
  18846. * \param [in] b unsigned long long type of value stored in b
  18847. * \return value stored in unsigned long long type
  18848. */
  18849. __STATIC_FORCEINLINE unsigned long long __RV_DSUB16(unsigned long long a, unsigned long long b)
  18850. {
  18851. unsigned long long result;
  18852. __ASM volatile("dsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18853. return result;
  18854. }
  18855. /* ===== Inline Function End for DSUB16 ===== */
  18856. /* ===== Inline Function Start for DRADD32 ===== */
  18857. /**
  18858. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18859. * \brief DRADD32 (64-bit SIMD 32-bit Halving Signed Addition)
  18860. * \details
  18861. * **Type**: SIMD
  18862. *
  18863. * **Syntax**:\n
  18864. * ~~~
  18865. * DRADD32 Rd, Rs1, Rs2
  18866. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18867. * ~~~
  18868. *
  18869. * **Purpose**:\n
  18870. * Do 32-bit signed integer element additions simultaneously. The results are halved to avoid overflow or saturation.
  18871. *
  18872. * **Description**:\n
  18873. * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed integer elements in Rs2. The results
  18874. * are first arithmetically right-shifted by 1 bit and then written to Rd.
  18875. *
  18876. * **Operations**:\n
  18877. * ~~~
  18878. * Rd.W[x] = [(Rs1.W[x]) + (Rs2.W[x])] s>> 1;
  18879. * x=1...0
  18880. * ~~~
  18881. *
  18882. * \param [in] a unsigned long long type of value stored in a
  18883. * \param [in] b unsigned long long type of value stored in b
  18884. * \return value stored in unsigned long long type
  18885. */
  18886. __STATIC_FORCEINLINE unsigned long long __RV_DRADD32(unsigned long long a, unsigned long long b)
  18887. {
  18888. unsigned long long result;
  18889. __ASM volatile("dradd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18890. return result;
  18891. }
  18892. /* ===== Inline Function End for DRADD32 ===== */
  18893. /* ===== Inline Function Start for DSUB32 ===== */
  18894. /**
  18895. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18896. * \brief DSUB32 (64-bit SIMD 32-bit Halving Signed Subtraction)
  18897. * \details
  18898. * **Type**: SIMD
  18899. *
  18900. * **Syntax**:\n
  18901. * ~~~
  18902. * DSUB32 Rd, Rs1, Rs2
  18903. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18904. * ~~~
  18905. *
  18906. * **Purpose**:\n
  18907. * Do 32-bit integer element subtractions simultaneously.
  18908. *
  18909. * **Description**:\n
  18910. * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1 . The
  18911. * results are written to Rd.
  18912. *
  18913. * **Operations**:\n
  18914. * ~~~
  18915. * Rd.W[x] = [(Rs1.E[x]) - (Rs2.E[x])] ;
  18916. * x=1...0
  18917. * ~~~
  18918. *
  18919. * \param [in] a unsigned long long type of value stored in a
  18920. * \param [in] b unsigned long long type of value stored in b
  18921. * \return value stored in unsigned long long type
  18922. */
  18923. __STATIC_FORCEINLINE unsigned long long __RV_DSUB32(unsigned long long a, unsigned long long b)
  18924. {
  18925. unsigned long long result;
  18926. __ASM volatile("dsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18927. return result;
  18928. }
  18929. /* ===== Inline Function End for DSUB32 ===== */
  18930. /* ===== Inline Function Start for DMSR16 ===== */
  18931. /**
  18932. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18933. * \brief DMSR16 (Signed Multiply Halfs with Right Shift 16-bit and Cross Multiply Halfs with Right Shift 16-bit)
  18934. * \details
  18935. * **Type**: SIMD
  18936. *
  18937. * **Syntax**:\n
  18938. * ~~~
  18939. * DMSR16 Rd, Rs1, Rs2
  18940. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18941. * ~~~
  18942. *
  18943. * **Purpose**:\n
  18944. * Do two signed 16-bit multiplications and cross multiplications from the 16-bit elements of two registers; and each multiplications performs a right shift operation.
  18945. *
  18946. * **Description**:\n
  18947. * For the `DMSR16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content
  18948. * of 32-bit chunks in Rs2, multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content
  18949. * of 32-bit chunks in Rs2.
  18950. * At the same time, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom16-bit Q15 content of 32-bit
  18951. * chunks in Rs2 and multiply the bottom16-bit Q15 content of 32-bit chunks in Rs1 with the top16-bit Q15 content of 32-bit
  18952. * chunks in Rs2. The Q31 results are then right-shifted 16-bits and clipped to Q15 values. The Q15 results are then written
  18953. * into Rd.
  18954. *
  18955. * **Operations**:\n
  18956. * ~~~
  18957. * Rd.H[0] = (Rs1.H[0] s* Rs2.H[0]) s>> 16
  18958. * Rd.H[1] = (Rs1.H[1] s* Rs2.H[1]) s>> 16
  18959. * Rd.H[2] = (Rs1.H[1] s* Rs2.H[0]) s>> 16
  18960. * Rd.H[3] = (Rs1.H[0] s* Rs2.H[1]) s>> 16
  18961. * ~~~
  18962. *
  18963. * \param [in] a unsigned long type of value stored in a
  18964. * \param [in] b unsigned long type of value stored in b
  18965. * \return value stored in unsigned long long type
  18966. */
  18967. __STATIC_FORCEINLINE unsigned long long __RV_DMSR16(unsigned long a, unsigned long b)
  18968. {
  18969. unsigned long long result;
  18970. __ASM volatile("dmsr16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18971. return result;
  18972. }
  18973. /* ===== Inline Function End for DMSR16 ===== */
  18974. /* ===== Inline Function Start for DMSR17 ===== */
  18975. /**
  18976. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18977. * \brief DMSR17 (Signed Multiply Halfs with Right Shift 17-bit and Cross Multiply Halfs with Right Shift 17-bit)
  18978. * \details
  18979. * **Type**: SIMD
  18980. *
  18981. * **Syntax**:\n
  18982. * ~~~
  18983. * DMSR17 Rd, Rs1, Rs2
  18984. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18985. * ~~~
  18986. *
  18987. * **Purpose**:\n
  18988. * Do two signed 16-bit multiplications and cross multiplications from the 16-bit elements of two registers;
  18989. * and each multiplications performs a right shift operation.
  18990. *
  18991. * **Description**:\n
  18992. * For the `DMSR17` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content
  18993. * of 32-bit chunks in Rs2, multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content
  18994. * of 32-bit chunks in Rs2.
  18995. * At the same time, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit
  18996. * chunks in Rs2 and multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit
  18997. * chunks in Rs2. The Q31 results are then right-shifted 17-bits and clipped to Q15 values. The Q15 results are then written
  18998. * into Rd.
  18999. *
  19000. * **Operations**:\n
  19001. * ~~~
  19002. * Rd.H[0] = (Rs1.H[0] s* Rs2.H[0]) s>> 17
  19003. * Rd.H[1] = (Rs1.H[1] s* Rs2.H[1]) s>> 17
  19004. * Rd.H[2] = (Rs1.H[1] s* Rs2.H[0]) s>> 17
  19005. * Rd.H[3] = (Rs1.H[0] s* Rs2.H[1]) s>> 17
  19006. * ~~~
  19007. *
  19008. * \param [in] a unsigned long type of value stored in a
  19009. * \param [in] b unsigned long type of value stored in b
  19010. * \return value stored in unsigned long long type
  19011. */
  19012. __STATIC_FORCEINLINE unsigned long long __RV_DMSR17(unsigned long a, unsigned long b)
  19013. {
  19014. unsigned long long result;
  19015. __ASM volatile("dmsr17 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19016. return result;
  19017. }
  19018. /* ===== Inline Function End for DMSR17 ===== */
  19019. /* ===== Inline Function Start for DMSR33 ===== */
  19020. /**
  19021. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19022. * \brief DMSR33 (Signed Multiply with Right Shift 33-bit and Cross Multiply with Right Shift 33-bit)
  19023. * \details
  19024. * **Type**: SIMD
  19025. *
  19026. * **Syntax**:\n
  19027. * ~~~
  19028. * DMSR33 Rd, Rs1, Rs2
  19029. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19030. * ~~~
  19031. *
  19032. * **Purpose**:\n
  19033. * Do two signed 32-bit multiplications from the 32-bit elements of two registers, and each multiplications performs a right
  19034. * shift operation.
  19035. *
  19036. * **Description**:\n
  19037. * For the `DMSR33` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the top 32-bit Q31 content
  19038. * of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64bit chunks in Rs1 with the bottom
  19039. * 32-bit Q31 content of 64-bit.
  19040. * The Q64 results are then right-shifted 33-bits and clipped to Q31 values. The Q31 results are then written into Rd.
  19041. *
  19042. * **Operations**:\n
  19043. * ~~~
  19044. * Rd.W[0] = (Rs1.W[0] s* Rs2.W[0]) s>> 33
  19045. * Rd.W[1] = (Rs1.W[1] s* Rs2.W[1]) s>> 33
  19046. * ~~~
  19047. *
  19048. * \param [in] a unsigned long long type of value stored in a
  19049. * \param [in] b unsigned long long type of value stored in b
  19050. * \return value stored in unsigned long long type
  19051. */
  19052. __STATIC_FORCEINLINE unsigned long long __RV_DMSR33(unsigned long long a, unsigned long long b)
  19053. {
  19054. unsigned long long result;
  19055. __ASM volatile("dmsr33 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19056. return result;
  19057. }
  19058. /* ===== Inline Function End for DMSR33 ===== */
  19059. /* ===== Inline Function Start for DMXSR33 ===== */
  19060. /**
  19061. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19062. * \brief DMXSR33 (Signed Multiply with Right Shift 33-bit and Cross Multiply with Right Shift 33-bit)
  19063. * \details
  19064. * **Type**: SIMD
  19065. *
  19066. * **Syntax**:\n
  19067. * ~~~
  19068. * DMXSR33 Rd, Rs1, Rs2
  19069. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19070. * ~~~
  19071. *
  19072. * **Purpose**:\n
  19073. * Do two signed 32-bit cross multiplications from the 32-bit elements of two registers, and each multiplications performs a
  19074. * right shift operation.
  19075. *
  19076. * **Description**:\n
  19077. * For the `DMXSR33` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit Q31
  19078. * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with
  19079. * the top 32-bit Q31 content of 64-bit chunks in Rs2.
  19080. * The Q63 results are then right-shifted 33-bits and clipped to Q31 values. The Q31 results are then written into Rd.
  19081. *
  19082. * **Operations**:\n
  19083. * ~~~
  19084. * Rd.W[0] = (Rs1.W[0] s* Rs2.W[1]) s>> 33
  19085. * Rd.W[1] = (Rs1.W[1] s* Rs2.W[0]) s>> 33
  19086. * ~~~
  19087. *
  19088. * \param [in] a unsigned long long type of value stored in a
  19089. * \param [in] b unsigned long long type of value stored in b
  19090. * \return value stored in unsigned long long type
  19091. */
  19092. __STATIC_FORCEINLINE unsigned long long __RV_DMXSR33(unsigned long long a, unsigned long long b)
  19093. {
  19094. unsigned long long result;
  19095. __ASM volatile("dmxsr33 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19096. return result;
  19097. }
  19098. /* ===== Inline Function End for DMXSR33 ===== */
  19099. /* ===== Inline Function Start for DREDAS16 ===== */
  19100. /**
  19101. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19102. * \brief DREDAS16 (Reduced Addition and Reduced Subtraction)
  19103. * \details
  19104. * **Type**: SIMD
  19105. *
  19106. * **Syntax**:\n
  19107. * ~~~
  19108. * DREDAS16 Rd, Rs1, Rs2
  19109. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19110. * ~~~
  19111. *
  19112. * **Purpose**:\n
  19113. * Do halfs reduced subtraction and halfs reduced addition from a register. The result is written to Rd.
  19114. *
  19115. * **Description**:\n
  19116. * For the `DREDAS16` instruction, subtract the top 16-bit Q15 element from the bottom 16-bit Q15 element of the bottom
  19117. * 32-bit Q31 content of 64-bit chunks in Rs1. At the same time, add the the top16-bit Q15 element with the bottom16-bit
  19118. * Q15 element of the top 32-bit Q31 content of 64-bit chunks in Rs1. The two Q15 results are then written into Rd.
  19119. *
  19120. * **Operations**:\n
  19121. * ~~~
  19122. * Rd.H[0] = Rs1.H[0] - Rs1.H[1]
  19123. * Rd.H[1] = Rs1.H[2] + Rs1.H[3]
  19124. * ~~~
  19125. *
  19126. * \param [in] a unsigned long long type of value stored in a
  19127. * \return value stored in unsigned long type
  19128. */
  19129. __STATIC_FORCEINLINE unsigned long __RV_DREDAS16(unsigned long long a)
  19130. {
  19131. unsigned long result;
  19132. __ASM volatile("dredas16 %0, %1" : "=r"(result) : "r"(a));
  19133. return result;
  19134. }
  19135. /* ===== Inline Function End for DREDAS16 ===== */
  19136. /* ===== Inline Function Start for DREDSA16 ===== */
  19137. /**
  19138. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19139. * \brief DREDSA16 (Reduced Subtraction and Reduced Addition)
  19140. * \details
  19141. * **Type**: SIMD
  19142. *
  19143. * **Syntax**:\n
  19144. * ~~~
  19145. * DREDSA16 Rd, Rs1, Rs2
  19146. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19147. * ~~~
  19148. *
  19149. * **Purpose**:\n
  19150. * Do halfs reduced subtraction and halfs reduced addition from a register. The result is written to Rd.
  19151. *
  19152. * **Description**:\n
  19153. * For the `DREDSA16` instruction, add the top 16-bit Q15 element from the bottom 16-bit Q15 element of the bottom 32-bit Q31 content of 64-bit chunks in Rs1. At the same time, subtract the the top16-bit Q15 element with the bottom16-bit Q15 element of the top 32-bit Q31 content of 64-bit chunks in Rs1. The two Q15 results are then written into Rd.
  19154. *
  19155. * **Operations**:\n
  19156. * ~~~
  19157. * Rd.H[0] = Rs1.H[0] + Rs1.H[1]
  19158. * Rd.H[1] = Rs1.H[2] - Rs1.H[3]
  19159. * ~~~
  19160. *
  19161. * \param [in] a unsigned long longtype of value stored in a
  19162. * \return value stored in unsigned long type
  19163. */
  19164. __STATIC_FORCEINLINE unsigned long __RV_DREDSA16(unsigned long long a)
  19165. {
  19166. unsigned long result;
  19167. __ASM volatile("dredsa16 %0, %1" : "=r"(result) : "r"(a));
  19168. return result;
  19169. }
  19170. /* ===== Inline Function End for DREDSA16 ===== */
  19171. /* ===== Inline Function Start for DKCLIP64 ===== */
  19172. /**
  19173. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19174. * \brief DKCLIP64 (64-bit Clipped to 16-bit Saturation Value)
  19175. * \details
  19176. * **Type**: SIMD
  19177. *
  19178. * **Syntax**:\n
  19179. * ~~~
  19180. * DKCLIP64 Rd, Rs1, Rs2
  19181. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19182. * ~~~
  19183. *
  19184. * **Purpose**:\n
  19185. * Do 15-bit element arithmetic right shift operations and limit result into 32-bit int,then do saturate operation to 16-bit and
  19186. * clip result to 16-bit Q15.
  19187. *
  19188. * **Description**:\n
  19189. * For the `DKCLIP64` instruction, shift the input 15 bits to the right and data convert the result to 32-bit int type, after
  19190. * which the input is saturated to limit the data to between 2^15-1 and -2^15. the result is converted to 16-bits q15 type. The
  19191. * final results are written to Rd.
  19192. *
  19193. * **Operations**:\n
  19194. * ~~~
  19195. * const int32_t max = (int32_t)((1U << 15U) - 1U);
  19196. * const int32_t min = -1 - max ;
  19197. * int32_t val = (int32_t)(Rs s>> 15);
  19198. * if (val > max) {
  19199. * Rd = max;
  19200. * } else if (val < min) {
  19201. * Rd = min;
  19202. * } else {
  19203. * Rd = (int16_t)val;
  19204. * }
  19205. * ~~~
  19206. *
  19207. * \param [in] a unsigned long long type of value stored in a
  19208. * \return value stored in int16_t type
  19209. */
  19210. __STATIC_FORCEINLINE int16_t __RV_DKCLIP64(unsigned long long a)
  19211. {
  19212. int16_t result;
  19213. __ASM volatile("dkclip64 %0, %1" : "=r"(result) : "r"(a));
  19214. return result;
  19215. }
  19216. /* ===== Inline Function End for DKCLIP64 ===== */
  19217. /* ===== Inline Function Start for DKMDA ===== */
  19218. /**
  19219. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19220. * \brief DKMDA (Signed Multiply Two Halfs and Add)
  19221. * \details
  19222. * **Type**: SIMD
  19223. *
  19224. * **Syntax**:\n
  19225. * ~~~
  19226. * DKMDA Rd, Rs1, Rs2
  19227. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19228. * ~~~
  19229. *
  19230. * **Purpose**:\n
  19231. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then adds the two 32-bit results together.
  19232. * The addition result may be saturated.
  19233. *
  19234. * **Description**:\n
  19235. * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
  19236. * 32-bit elements of Rs2 and then adds the result to the result of multiplying the top 16-bit content of the 32-bit elements of
  19237. * Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  19238. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1 The final results are
  19239. * written to Rd. The 16-bit contents are treated as signed integers
  19240. *
  19241. * **Operations**:\n
  19242. * ~~~
  19243. * if (Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000){
  19244. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  19245. * } else {
  19246. * Rd.W[x] = 0x7fffffff;
  19247. * OV = 1;
  19248. * }
  19249. * x=1...0
  19250. * ~~~
  19251. *
  19252. * \param [in] a unsigned long long type of value stored in a
  19253. * \param [in] b unsigned long long type of value stored in b
  19254. * \return value stored in unsigned long long type
  19255. */
  19256. __STATIC_FORCEINLINE unsigned long long __RV_DKMDA(unsigned long long a, unsigned long long b)
  19257. {
  19258. unsigned long long result;
  19259. __ASM volatile("dkmda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19260. return result;
  19261. }
  19262. /* ===== Inline Function End for DKMDA ===== */
  19263. /* ===== Inline Function Start for DKMXDA ===== */
  19264. /**
  19265. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19266. * \brief DKMXDA (Signed Crossed Multiply Two Halfs and Add)
  19267. * \details
  19268. * **Type**: SIMD
  19269. *
  19270. * **Syntax**:\n
  19271. * ~~~
  19272. * DKMXDA Rd, Rs1, Rs2
  19273. * ~~~
  19274. *
  19275. * **Purpose**:\n
  19276. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then adds the two 32-bit results together.
  19277. * The addition result may be saturated.
  19278. * * DKMXDA: top*bottom + top*bottom (per 32-bit element)
  19279. *
  19280. * **Description**:\n
  19281. * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit
  19282. * elements of Rs2 and then adds the result to the result of multiplying the top 16-bit content of the 32-bit elements of Rs1
  19283. * with the bottom 16-bit content of the 32-bit elements of Rs2.
  19284. * The addition result is checked for saturation.If saturation happens, the result is saturated to 2^31-1 The final results are
  19285. * written to Rd. The 16-bit contents are treated as signed integers.
  19286. *
  19287. * **Operations**:\n
  19288. * ~~~
  19289. * if (Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000){
  19290. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  19291. * } else {
  19292. * Rd.W[x] = 0x7fffffff;
  19293. * OV = 1;
  19294. * }
  19295. * x=1...0
  19296. * ~~~
  19297. *
  19298. * \param [in] a unsigned long long type of value stored in a
  19299. * \param [in] b unsigned long long type of value stored in b
  19300. * \return value stored in unsigned long long type
  19301. */
  19302. __STATIC_FORCEINLINE unsigned long long __RV_DKMXDA(unsigned long long a, unsigned long long b)
  19303. {
  19304. unsigned long long result;
  19305. __ASM volatile("dkmxda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19306. return result;
  19307. }
  19308. /* ===== Inline Function End for DKMXDA ===== */
  19309. /* ===== Inline Function Start for DSMDRS ===== */
  19310. /**
  19311. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19312. * \brief DSMDRS (Signed Multiply Two Halfs and Reverse Subtract)
  19313. * \details
  19314. * **Type**: SIMD
  19315. *
  19316. * **Syntax**:\n
  19317. * ~~~
  19318. * DSMDRS Rd, Rs1, Rs2
  19319. * ~~~
  19320. *
  19321. * **Purpose**:\n
  19322. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then perform a subtraction operation
  19323. * between the two 32-bit results.
  19324. * * DSMDRS: bottom*bottom - top*top (per 32-bit element)
  19325. *
  19326. * **Description**:\n
  19327. * This instruction multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit
  19328. * elements of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of the 32-bit elements
  19329. * of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
  19330. * The subtraction result is written to the corresponding 32-bit element of Rd (The 16-bit contents of multiplication are
  19331. * treated as signed integers).
  19332. *
  19333. * **Operations**:\n
  19334. * ~~~
  19335. * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); x = 1...0
  19336. * ~~~
  19337. *
  19338. * \param [in] a unsigned long long type of value stored in a
  19339. * \param [in] b unsigned long long type of value stored in b
  19340. * \return value stored in unsigned long long type
  19341. */
  19342. __STATIC_FORCEINLINE unsigned long long __RV_DSMDRS(unsigned long long a, unsigned long long b)
  19343. {
  19344. unsigned long long result;
  19345. __ASM volatile("dsmdrs %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19346. return result;
  19347. }
  19348. /* ===== Inline Function End for DSMDRS ===== */
  19349. /* ===== Inline Function Start for DSMXDS ===== */
  19350. /**
  19351. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19352. * \brief DSMXDS (Signed Crossed Multiply Two Halfs and Subtract)
  19353. * \details
  19354. * **Type**: SIMD
  19355. *
  19356. * **Syntax**:\n
  19357. * ~~~
  19358. * DSMXDS Rd, Rs1, Rs2
  19359. * ~~~
  19360. *
  19361. * **Purpose**:\n
  19362. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then perform a subtraction operation
  19363. * between the two 32-bit results.
  19364. * * DSMXDS: top*bottom - bottom*top (per 32-bit element)
  19365. *
  19366. * **Description**:\n
  19367. * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit
  19368. * elements of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of the 32-bit elements
  19369. * of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
  19370. * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of multiplication are
  19371. * treated as signed integers.
  19372. *
  19373. * **Operations**:\n
  19374. * ~~~
  19375. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); x = 1...0
  19376. * ~~~
  19377. *
  19378. * \param [in] a unsigned long long type of value stored in a
  19379. * \param [in] b unsigned long long type of value stored in b
  19380. * \return value stored in unsigned long long type
  19381. */
  19382. __STATIC_FORCEINLINE unsigned long long __RV_DSMXDS(unsigned long long a, unsigned long long b)
  19383. {
  19384. unsigned long long result;
  19385. __ASM volatile("dsmxds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19386. return result;
  19387. }
  19388. /* ===== Inline Function End for DSMXDS ===== */
  19389. /* ===== Inline Function Start for DSMBB32 ===== */
  19390. /**
  19391. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19392. * \brief DSMBB32 (Signed Multiply Bottom Word & Bottom Word)
  19393. * \details
  19394. * **Type**: SIMD
  19395. *
  19396. * **Syntax**:\n
  19397. * ~~~
  19398. * DSMBB32 Rd, Rs1, Rs2
  19399. * ~~~
  19400. *
  19401. * **Purpose**:\n
  19402. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit result to a third register.
  19403. * * DSMBB32: bottom*bottom
  19404. *
  19405. * **Description**:\n
  19406. * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2. The 64-bit multiplication result is written to Rd.
  19407. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19408. *
  19409. * **Operations**:\n
  19410. * ~~~
  19411. * res = (Rs1.W[0] * Rs2.W[0]);
  19412. * Rd = res;
  19413. * ~~~
  19414. *
  19415. * \param [in] a unsigned long long type of value stored in a
  19416. * \param [in] b unsigned long long type of value stored in b
  19417. * \return value stored in long long type
  19418. */
  19419. __STATIC_FORCEINLINE long long __RV_DSMBB32(unsigned long long a, unsigned long long b)
  19420. {
  19421. long long result;
  19422. __ASM volatile("dsmbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19423. return result;
  19424. }
  19425. /* ===== Inline Function End for DSMBB32 ===== */
  19426. /* ===== Inline Function Start for DSMBB32.sra14 ===== */
  19427. /**
  19428. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19429. * \brief DSMBB32.sra14 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 14)
  19430. * \details
  19431. * **Type**: SIMD
  19432. *
  19433. * **Syntax**:\n
  19434. * ~~~
  19435. * DSMBB32.sra14 Rd, Rs1, Rs2
  19436. * ~~~
  19437. *
  19438. * **Purpose**:\n
  19439. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 14-
  19440. * bit,finally write the 64-bit result to a third register.
  19441. * * DSMBB32.sra14: bottom*bottom s>> 14
  19442. *
  19443. * **Description**:\n
  19444. * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2. The 64-bit multiplication result is written to Rd after right shift 14-bit.
  19445. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19446. *
  19447. * **Operations**:\n
  19448. * ~~~
  19449. * res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
  19450. * Rd = res;
  19451. * ~~~
  19452. *
  19453. * \param [in] a unsigned long long type of value stored in a
  19454. * \param [in] b unsigned long long type of value stored in b
  19455. * \return value stored in long long type
  19456. */
  19457. __STATIC_FORCEINLINE long long __RV_DSMBB32_SRA14(unsigned long long a, unsigned long long b)
  19458. {
  19459. long long result;
  19460. __ASM volatile("dsmbb32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19461. return result;
  19462. }
  19463. /* ===== Inline Function End for DSMBB32.sra14 ===== */
  19464. /* ===== Inline Function Start for DSMBB32.sra32 ===== */
  19465. /**
  19466. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19467. * \brief DSMBB32.sra32 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 32)
  19468. * \details
  19469. * **Type**: SIMD
  19470. *
  19471. * **Syntax**:\n
  19472. * ~~~
  19473. * DSMBB32.sra32 Rd, Rs1, Rs2
  19474. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19475. * ~~~
  19476. *
  19477. * **Purpose**:\n
  19478. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 32-
  19479. * bit,finally write the 64-bit result to a third register.
  19480. * * DSMBB32.sra32: bottom*bottom s >> 32
  19481. *
  19482. * **Description**:\n
  19483. * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
  19484. * The 64-bit multiplication result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19485. *
  19486. * **Operations**:\n
  19487. * ~~~
  19488. * res = (Rs1.W[0] * Rs2.W[0]) s>> 32;
  19489. * Rd = res;
  19490. * ~~~
  19491. *
  19492. * \param [in] a unsigned long long type of value stored in a
  19493. * \param [in] b unsigned long long type of value stored in b
  19494. * \return value stored in long long type
  19495. */
  19496. __STATIC_FORCEINLINE long long __RV_DSMBB32_SRA32(unsigned long long a, unsigned long long b)
  19497. {
  19498. long long result;
  19499. __ASM volatile("dsmbb32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19500. return result;
  19501. }
  19502. /* ===== Inline Function End for DSMBB32.sra32 ===== */
  19503. /* ===== Inline Function Start for DSMBT32 ===== */
  19504. /**
  19505. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19506. * \brief SMBT32 (Signed Multiply Bottom Word & Top Word)
  19507. * \details
  19508. * **Type**: SIMD
  19509. *
  19510. * **Syntax**:\n
  19511. * ~~~
  19512. * DSMBT32 Rd, Rs1, Rs2
  19513. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19514. * ~~~
  19515. *
  19516. * **Purpose**:\n
  19517. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit
  19518. * result to a third register.
  19519. * * DSMBT32: bottom*top
  19520. *
  19521. * **Description**:\n
  19522. * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
  19523. * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19524. *
  19525. * **Operations**:\n
  19526. * ~~~
  19527. * res = (Rs1.W[0] * Rs2.W[0]);
  19528. * Rd = res;
  19529. * ~~~
  19530. *
  19531. * \param [in] a unsigned long long type of value stored in a
  19532. * \param [in] b unsigned long long type of value stored in b
  19533. * \return value stored in long long type
  19534. */
  19535. __STATIC_FORCEINLINE long long __RV_DSMBT32(unsigned long long a, unsigned long long b)
  19536. {
  19537. long long result;
  19538. __ASM volatile("dsmbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19539. return result;
  19540. }
  19541. /* ===== Inline Function End for DSMBT32 ===== */
  19542. /* ===== Inline Function Start for DSMBT32.sra14 ===== */
  19543. /**
  19544. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19545. * \brief DSMBT32.sra14 (Signed Multiply Bottom Word & Top Word with Right Shift 14)
  19546. * \details
  19547. * **Type**: SIMD
  19548. *
  19549. * **Syntax**:\n
  19550. * ~~~
  19551. * DSMBT32.sra14 Rd, Rs1, Rs2
  19552. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19553. * ~~~
  19554. *
  19555. * **Purpose**:\n
  19556. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 14-
  19557. * bit,finally write the 64-bit result to a third register.
  19558. * * DSMBT32.sra14: bottom*bottom s>> 14
  19559. *
  19560. * **Description**:\n
  19561. * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
  19562. * result is written to Rd after right shift 14-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19563. *
  19564. * **Operations**:\n
  19565. * ~~~
  19566. * res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
  19567. * Rd = res;
  19568. * ~~~
  19569. *
  19570. * \param [in] a unsigned long long type of value stored in a
  19571. * \param [in] b unsigned long long type of value stored in b
  19572. * \return value stored in long long type
  19573. */
  19574. __STATIC_FORCEINLINE long long __RV_DSMBT32_SRA14(unsigned long long a, unsigned long long b)
  19575. {
  19576. long long result;
  19577. __ASM volatile("dsmbt32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19578. return result;
  19579. }
  19580. /* ===== Inline Function End for DSMBT32.sra14 ===== */
  19581. /* ===== Inline Function Start for DSMBT32.sra32 ===== */
  19582. /**
  19583. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19584. * \brief DSMBT32.sra32 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 32)
  19585. * \details
  19586. * **Type**: SIMD
  19587. *
  19588. * **Syntax**:\n
  19589. * ~~~
  19590. * DSMBT32.sra32 Rd, Rs1, Rs2
  19591. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19592. * ~~~
  19593. *
  19594. * **Purpose**:\n
  19595. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 32-
  19596. * bit,finally write the 64-bit result to a third register.
  19597. * * DSMBT32.sra32: bottom*bottom s>> 32
  19598. *
  19599. * **Description**:\n
  19600. * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
  19601. * result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19602. *
  19603. * **Operations**:\n
  19604. * ~~~
  19605. * res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
  19606. * Rd = res;
  19607. * ~~~
  19608. *
  19609. * \param [in] a unsigned long long type of value stored in a
  19610. * \param [in] b unsigned long long type of value stored in b
  19611. * \return value stored in long long type
  19612. */
  19613. __STATIC_FORCEINLINE long long __RV_DSMBT32_SRA32(unsigned long long a, unsigned long long b)
  19614. {
  19615. long long result;
  19616. __ASM volatile("dsmbt32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19617. return result;
  19618. }
  19619. /* ===== Inline Function End for DSMBT32.sra32 ===== */
  19620. /* ===== Inline Function Start for DSMTT32 ===== */
  19621. /**
  19622. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19623. * \brief DSMTT32 (Signed Multiply Top Word & Top Word)
  19624. * \details
  19625. * **Type**: SIMD
  19626. *
  19627. * **Syntax**:\n
  19628. * ~~~
  19629. * DSMTT32 Rd, Rs1, Rs2
  19630. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19631. * ~~~
  19632. *
  19633. * **Purpose**:\n
  19634. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit
  19635. * result to a third register.
  19636. * * DSMTT32: top*top
  19637. *
  19638. * **Description**:\n
  19639. * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
  19640. * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19641. *
  19642. * **Operations**:\n
  19643. * ~~~
  19644. * res = Rs1.W[1] * Rs2.W[1];
  19645. * Rd = res;
  19646. * ~~~
  19647. *
  19648. * \param [in] a unsigned long long type of value stored in a
  19649. * \param [in] b unsigned long long type of value stored in b
  19650. * \return value stored in long long type
  19651. */
  19652. __STATIC_FORCEINLINE long long __RV_DSMTT32(unsigned long long a, unsigned long long b)
  19653. {
  19654. long long result;
  19655. __ASM volatile("dsmtt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19656. return result;
  19657. }
  19658. /* ===== Inline Function End for DSMTT32 ===== */
  19659. /* ===== Inline Function Start for DSMTT32.sra14 ===== */
  19660. /**
  19661. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19662. * \brief DSMTT32.sra14 (Signed Multiply Top Word & Top Word with Right Shift 14-bit)
  19663. * \details
  19664. * **Type**: SIMD
  19665. *
  19666. * **Syntax**:\n
  19667. * ~~~
  19668. * DSMTT32.sra14 Rd, Rs1, Rs2
  19669. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19670. * ~~~
  19671. *
  19672. * **Purpose**:\n
  19673. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register,then right shift 14-bit,
  19674. * finally write the 64-bit result to a third register.
  19675. * * DSMTT32.sra14: top*top s>> 14
  19676. *
  19677. * **Description**:\n
  19678. * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
  19679. * result is written to Rd after right shift 14-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19680. *
  19681. * **Operations**:\n
  19682. * ~~~
  19683. * res = Rs1.W[1] * Rs2.W[1] >> 14;
  19684. * Rd = res;
  19685. * ~~~
  19686. *
  19687. * \param [in] a unsigned long long type of value stored in a
  19688. * \param [in] b unsigned long long type of value stored in b
  19689. * \return value stored in long long type
  19690. */
  19691. __STATIC_FORCEINLINE long long __RV_DSMTT32_SRA14(unsigned long long a, unsigned long long b)
  19692. {
  19693. long long result;
  19694. __ASM volatile("dsmtt32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19695. return result;
  19696. }
  19697. /* ===== Inline Function End for DSMTT32.sra14 ===== */
  19698. /* ===== Inline Function Start for DSMTT32.sra32 ===== */
  19699. /**
  19700. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19701. * \brief DSMTT32.sra32 (Signed Multiply Top Word & Top Word with Right Shift 32-bit)
  19702. * \details
  19703. * **Type**: SIMD
  19704. *
  19705. * **Syntax**:\n
  19706. * ~~~
  19707. * DSMTT32.sra32 Rd, Rs1, Rs2
  19708. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19709. * ~~~
  19710. *
  19711. * **Purpose**:\n
  19712. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register,then right shift 32-bit,
  19713. * finally write the 64-bit result to a third register.
  19714. * * DSMTT32.sra32: top*top s>> 32
  19715. *
  19716. * **Description**:\n
  19717. * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
  19718. * result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19719. *
  19720. * **Operations**:\n
  19721. * ~~~
  19722. * res = Rs1.W[1] * Rs2.W[1] >> 32;
  19723. * Rd = res;
  19724. * ~~~
  19725. *
  19726. * \param [in] a unsigned long long type of value stored in a
  19727. * \param [in] b unsigned long long type of value stored in b
  19728. * \return value stored in long long type
  19729. */
  19730. __STATIC_FORCEINLINE long long __RV_DSMTT32_SRA32(unsigned long long a, unsigned long long b)
  19731. {
  19732. long long result;
  19733. __ASM volatile("dsmtt32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19734. return result;
  19735. }
  19736. /* ===== Inline Function End for DSMTT32.sra32 ===== */
  19737. /* ===== Inline Function Start for DPKBB32 ===== */
  19738. /**
  19739. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19740. * \brief DPKBB32 (Pack Two 32-bit Data from Both Bottom Half)
  19741. * \details
  19742. * **Type**: SIMD
  19743. *
  19744. * **Syntax**:\n
  19745. * ~~~
  19746. * DPKBB32 Rd, Rs1, Rs2
  19747. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19748. * ~~~
  19749. *
  19750. * **Purpose**:\n
  19751. * Pack 32-bit data from 64-bit chunks in two registers.
  19752. * * DPKBB32: bottom.bottom
  19753. *
  19754. * **Description**:\n
  19755. * This instruction moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  19756. *
  19757. * **Operations**:\n
  19758. * ~~~
  19759. * Rd = CONCAT(Rs1.W[0], Rs2.W[0]);
  19760. * ~~~
  19761. *
  19762. * \param [in] a unsigned long long type of value stored in a
  19763. * \param [in] b unsigned long long type of value stored in b
  19764. * \return value stored in unsigned long long type
  19765. */
  19766. __STATIC_FORCEINLINE unsigned long long __RV_DPKBB32(unsigned long long a, unsigned long long b)
  19767. {
  19768. unsigned long long result;
  19769. __ASM volatile("dpkbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19770. return result;
  19771. }
  19772. /* ===== Inline Function End for DPKBB32 ===== */
  19773. /* ===== Inline Function Start for DPKBT32 ===== */
  19774. /**
  19775. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19776. * \brief DPKBT32 (Pack Two 32-bit Data from Bottom and Top Half)
  19777. * \details
  19778. * **Type**: SIMD
  19779. *
  19780. * **Syntax**:\n
  19781. * ~~~
  19782. * DPKBT32 Rd, Rs1, Rs2
  19783. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19784. * ~~~
  19785. *
  19786. * **Purpose**:\n
  19787. * Pack 32-bit data from 64-bit chunks in two registers.
  19788. * * DPKBT32: bottom.top
  19789. *
  19790. * **Description**:\n
  19791. * This instruction moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  19792. *
  19793. * **Operations**:\n
  19794. * ~~~
  19795. * Rd = CONCAT(Rs1.W[0], Rs2.W[1]);
  19796. * ~~~
  19797. *
  19798. * \param [in] a unsigned long long type of value stored in a
  19799. * \param [in] b unsigned long long type of value stored in b
  19800. * \return value stored in unsigned long long type
  19801. */
  19802. __STATIC_FORCEINLINE unsigned long long __RV_DPKBT32(unsigned long long a, unsigned long long b)
  19803. {
  19804. unsigned long long result;
  19805. __ASM volatile("dpkbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19806. return result;
  19807. }
  19808. /* ===== Inline Function End for DPKBT32 ===== */
  19809. /* ===== Inline Function Start for DPKTT32 ===== */
  19810. /**
  19811. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19812. * \brief DPKTT32 (Pack Two 32-bit Data from Both Top Half)
  19813. * \details
  19814. * **Type**: SIMD
  19815. *
  19816. * **Syntax**:\n
  19817. * ~~~
  19818. * DPKTT32 Rd, Rs1, Rs2
  19819. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19820. * ~~~
  19821. *
  19822. * **Purpose**:\n
  19823. * Pack 32-bit data from 64-bit chunks in two registers.
  19824. * * DPKTT32: top.top
  19825. *
  19826. * **Description**:\n
  19827. * This instruction moves Rs1.W[1] to Rd.W[0] and moves Rs2.W[1] to Rd.W[0].
  19828. *
  19829. * **Operations**:\n
  19830. * ~~~
  19831. * Rd = CONCAT(Rs1.W[1], Rs2.W[1]);
  19832. * ~~~
  19833. *
  19834. * \param [in] a unsigned long long type of value stored in a
  19835. * \param [in] b unsigned long long type of value stored in b
  19836. * \return value stored in unsigned long long type
  19837. */
  19838. __STATIC_FORCEINLINE unsigned long long __RV_DPKTT32(unsigned long long a, unsigned long long b)
  19839. {
  19840. unsigned long long result;
  19841. __ASM volatile("dpktt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19842. return result;
  19843. }
  19844. /* ===== Inline Function End for DPKTT32 ===== */
  19845. /* ===== Inline Function Start for DPKTB32 ===== */
  19846. /**
  19847. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19848. * \brief DPKTB32 (Pack Two 32-bit Data from Top and Bottom Half)
  19849. * \details
  19850. * **Type**: SIMD
  19851. *
  19852. * **Syntax**:\n
  19853. * ~~~
  19854. * DPKTB32 Rd, Rs1, Rs2
  19855. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19856. * ~~~
  19857. *
  19858. * **Purpose**:\n
  19859. * Pack 32-bit data from 64-bit chunks in two registers.
  19860. * * DPKTB32: top.bottom
  19861. *
  19862. * **Description**:\n
  19863. * This instruction moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  19864. *
  19865. * **Operations**:\n
  19866. * ~~~
  19867. * Rd = CONCAT(Rs1.W[1], Rs2.W[0]);
  19868. * ~~~
  19869. *
  19870. * \param [in] a unsigned long long type of value stored in a
  19871. * \param [in] b unsigned long long type of value stored in b
  19872. * \return value stored in unsigned long long type
  19873. */
  19874. __STATIC_FORCEINLINE unsigned long long __RV_DPKTB32(unsigned long long a, unsigned long long b)
  19875. {
  19876. unsigned long long result;
  19877. __ASM volatile("dpktb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19878. return result;
  19879. }
  19880. /* ===== Inline Function End for DPKTB32 ===== */
  19881. /* ===== Inline Function Start for DPKTB16 ===== */
  19882. /**
  19883. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19884. * \brief DPKTB16 (Pack Two 32-bit Data from Top and Bottom Half)
  19885. * \details
  19886. * **Type**: SIMD
  19887. *
  19888. * **Syntax**:\n
  19889. * ~~~
  19890. * DPKTB16 Rd, Rs1, Rs2
  19891. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19892. * ~~~
  19893. *
  19894. * **Purpose**:\n
  19895. * Pack 16-bit data from 32-bit chunks in two registers.
  19896. * * DPKTB16: top.bottom
  19897. *
  19898. * **Description**:\n
  19899. * This instruction moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
  19900. *
  19901. * **Operations**:\n
  19902. * ~~~
  19903. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]);
  19904. * x=1...0
  19905. * ~~~
  19906. *
  19907. * \param [in] a unsigned long long type of value stored in a
  19908. * \param [in] b unsigned long long type of value stored in b
  19909. * \return value stored in unsigned long long type
  19910. */
  19911. __STATIC_FORCEINLINE unsigned long long __RV_DPKTB16(unsigned long long a, unsigned long long b)
  19912. {
  19913. unsigned long long result;
  19914. __ASM volatile("dpktb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19915. return result;
  19916. }
  19917. /* ===== Inline Function End for DPKTB16 ===== */
  19918. /* ===== Inline Function Start for DPKBB16 ===== */
  19919. /**
  19920. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19921. * \brief DPKBB16 (Pack Two 16-bit Data from Both Bottom Half)
  19922. * \details
  19923. * **Type**: SIMD
  19924. *
  19925. * **Syntax**:\n
  19926. * ~~~
  19927. * DPKBB16 Rd, Rs1, Rs2
  19928. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19929. * ~~~
  19930. *
  19931. * **Purpose**:\n
  19932. * Pack 16-bit data from 32-bit chunks in two registers.
  19933. * * PKBB16: bottom.bottom
  19934. *
  19935. * **Description**:\n
  19936. * This instruction moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
  19937. *
  19938. * **Operations**:\n
  19939. * ~~~
  19940. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]);
  19941. * x=1...0
  19942. * ~~~
  19943. *
  19944. * \param [in] a unsigned long long type of value stored in a
  19945. * \param [in] b unsigned long long type of value stored in b
  19946. * \return value stored in unsigned long long type
  19947. */
  19948. __STATIC_FORCEINLINE unsigned long long __RV_DPKBB16(unsigned long long a, unsigned long long b)
  19949. {
  19950. unsigned long long result;
  19951. __ASM volatile("dpkbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19952. return result;
  19953. }
  19954. /* ===== Inline Function End for DPKBB16 ===== */
  19955. /* ===== Inline Function Start for DPKBT16 ===== */
  19956. /**
  19957. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19958. * \brief DPKBT16 (Pack Two 16-bit Data from Bottom and Top Half)
  19959. * \details
  19960. * **Type**: SIMD
  19961. *
  19962. * **Syntax**:\n
  19963. * ~~~
  19964. * DPKBT16 Rd, Rs1, Rs2
  19965. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19966. * ~~~
  19967. *
  19968. * **Purpose**:\n
  19969. * Pack 16-bit data from 32-bit chunks in two registers.
  19970. * * PKBT16: bottom.top
  19971. *
  19972. * **Description**:\n
  19973. * This instruction moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  19974. *
  19975. * **Operations**:\n
  19976. * ~~~
  19977. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]);
  19978. * x=1...0
  19979. * ~~~
  19980. *
  19981. * \param [in] a unsigned long long type of value stored in a
  19982. * \param [in] b unsigned long long type of value stored in b
  19983. * \return value stored in unsigned long long type
  19984. */
  19985. __STATIC_FORCEINLINE unsigned long long __RV_DPKBT16(unsigned long long a, unsigned long long b)
  19986. {
  19987. unsigned long long result;
  19988. __ASM volatile("dpkbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19989. return result;
  19990. }
  19991. /* ===== Inline Function End for DPKBT16 ===== */
  19992. /* ===== Inline Function Start for DPKTT16 ===== */
  19993. /**
  19994. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19995. * \brief DPKTT16 (Pack Two 16-bit Data from Both Top Half)
  19996. * \details
  19997. * **Type**: SIMD
  19998. *
  19999. * **Syntax**:\n
  20000. * ~~~
  20001. * DPKTT16 Rd, Rs1, Rs2
  20002. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20003. * ~~~
  20004. *
  20005. * **Purpose**:\n
  20006. * Pack 16-bit data from 32-bit chunks in two registers.
  20007. * * PKTT16 top.top
  20008. *
  20009. * **Description**:\n
  20010. * This instruction moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  20011. *
  20012. * **Operations**:\n
  20013. * ~~~
  20014. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]);
  20015. * x=1...0
  20016. * ~~~
  20017. *
  20018. * \param [in] a unsigned long long type of value stored in a
  20019. * \param [in] b unsigned long long type of value stored in b
  20020. * \return value stored in unsigned long long type
  20021. */
  20022. __STATIC_FORCEINLINE unsigned long long __RV_DPKTT16(unsigned long long a, unsigned long long b)
  20023. {
  20024. unsigned long long result;
  20025. __ASM volatile("dpktt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20026. return result;
  20027. }
  20028. /* ===== Inline Function End for DPKTT16 ===== */
  20029. /* ===== Inline Function Start for DSRA16 ===== */
  20030. /**
  20031. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20032. * \brief DSRA16 (SIMD 16-bit Shift Right Arithmetic)
  20033. * \details
  20034. * **Type**: SIMD
  20035. *
  20036. * **Syntax**:\n
  20037. * ~~~
  20038. * DSRA16 Rd, Rs1, Rs2
  20039. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20040. * ~~~
  20041. *
  20042. * **Purpose**:\n
  20043. * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a variable from a GPR.
  20044. *
  20045. * **Description**:\n
  20046. * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out bits are filled with the sign-bit of
  20047. * the data elements. The shift amount is specified by the low-order 4-bits of the value in the Rs2 register. And the results
  20048. * are written to Rd.
  20049. *
  20050. * **Operations**:\n
  20051. * ~~~
  20052. * sa = Rs2[3:0];
  20053. * if (sa != 0)
  20054. * {
  20055. * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
  20056. * } else {
  20057. * Rd = Rs1;
  20058. * }
  20059. * x=3...0
  20060. * ~~~
  20061. *
  20062. * \param [in] a unsigned long long type of value stored in a
  20063. * \param [in] b unsigned long type of value stored in b
  20064. * \return value stored in unsigned long long type
  20065. */
  20066. __STATIC_FORCEINLINE unsigned long long __RV_DSRA16(unsigned long long a, unsigned long b)
  20067. {
  20068. unsigned long long result;
  20069. __ASM volatile("dsra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20070. return result;
  20071. }
  20072. /* ===== Inline Function End for DSRA16 ===== */
  20073. /* ===== Inline Function Start for DADD16 ===== */
  20074. /**
  20075. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20076. * \brief DADD16 (16-bit Addition)
  20077. * \details
  20078. * **Type**: SIMD
  20079. *
  20080. * **Syntax**:\n
  20081. * ~~~
  20082. * DADD16 Rd, Rs1, Rs2
  20083. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20084. * ~~~
  20085. *
  20086. * **Purpose**:\n
  20087. * Do 16-bit integer element additions simultaneously.
  20088. *
  20089. * **Description**:\n
  20090. * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit unsigned integer elements in Rs2. And
  20091. * the results are written to Rd.
  20092. *
  20093. * **Operations**:\n
  20094. * ~~~
  20095. * Rd.H[x] = Rs1.H[x] + Rs2.H[x];
  20096. * x=3...0
  20097. * ~~~
  20098. *
  20099. * \param [in] a unsigned long long type of value stored in a
  20100. * \param [in] b unsigned long long type of value stored in b
  20101. * \return value stored in unsigned long long type
  20102. */
  20103. __STATIC_FORCEINLINE unsigned long long __RV_DADD16(unsigned long long a, unsigned long long b)
  20104. {
  20105. unsigned long long result;
  20106. __ASM volatile("dadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20107. return result;
  20108. }
  20109. /* ===== Inline Function End for DADD16 ===== */
  20110. /* ===== Inline Function Start for DADD32 ===== */
  20111. /**
  20112. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20113. * \brief DADD32 (32-bit Addition)
  20114. * \details
  20115. * **Type**: SIMD
  20116. *
  20117. * **Syntax**:\n
  20118. * ~~~
  20119. * DADD32 Rd, Rs1, Rs2
  20120. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20121. * ~~~
  20122. *
  20123. * **Purpose**:\n
  20124. * Do 32-bit integer element additions simultaneously.
  20125. *
  20126. * **Description**:\n
  20127. * This instruction adds the 32-bit integer elements in Rs1 with the 32-bit integer elements in Rs2, and then writes the 32-bit
  20128. * element results to Rd.
  20129. *
  20130. * **Operations**:\n
  20131. * ~~~
  20132. * Rd.W[x] = Rs1.W[x] + Rs2.W[x];
  20133. * x=1...0
  20134. * ~~~
  20135. *
  20136. * \param [in] a unsigned long long type of value stored in a
  20137. * \param [in] b unsigned long long type of value stored in b
  20138. * \return value stored in unsigned long long type
  20139. */
  20140. __STATIC_FORCEINLINE unsigned long long __RV_DADD32(unsigned long long a, unsigned long long b)
  20141. {
  20142. unsigned long long result;
  20143. __ASM volatile("dadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20144. return result;
  20145. }
  20146. /* ===== Inline Function End for DADD32 ===== */
  20147. /* ===== Inline Function Start for DSMBB16 ===== */
  20148. /**
  20149. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20150. * \brief DSMBB16 (Signed Multiply Bottom Half & Bottom Half)
  20151. * \details
  20152. * **Type**: SIMD
  20153. *
  20154. * **Syntax**:\n
  20155. * ~~~
  20156. * DSMBB16 Rd, Rs1, Rs2
  20157. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20158. * ~~~
  20159. *
  20160. * **Purpose**:\n
  20161. * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit elements
  20162. * of another register and write the result to a third register.
  20163. * * DSMBB16: W[x].bottom*W[x].bottom
  20164. *
  20165. * **Description**:\n
  20166. * For the `DSMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom
  20167. * 16-bit content of the 32-bit elements of Rs2.
  20168. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.
  20169. *
  20170. * **Operations**:\n
  20171. * ~~~
  20172. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0];
  20173. * x=1...0
  20174. * ~~~
  20175. *
  20176. * \param [in] a unsigned long long type of value stored in a
  20177. * \param [in] b unsigned long long type of value stored in b
  20178. * \return value stored in unsigned long long type
  20179. */
  20180. __STATIC_FORCEINLINE unsigned long long __RV_DSMBB16(unsigned long long a, unsigned long long b) /* pass */
  20181. {
  20182. unsigned long long result;
  20183. __ASM volatile("dsmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20184. return result;
  20185. }
  20186. /* ===== Inline Function End for DSMBB16 ===== */
  20187. /* ===== Inline Function Start for DSMBT16 ===== */
  20188. /**
  20189. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20190. * \brief DSMBT16 (Signed Multiply Bottom Half & Top Half)
  20191. * \details
  20192. * **Type**: SIMD
  20193. *
  20194. * **Syntax**:\n
  20195. * ~~~
  20196. * DSMBT16 Rd, Rs1, Rs2
  20197. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20198. * ~~~
  20199. *
  20200. * **Purpose**:\n
  20201. * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit
  20202. * elements of another register and write the result to a third register.
  20203. * * DSMBT16: W[x].bottom *W[x].top
  20204. *
  20205. * **Description**:\n
  20206. * For the `DSMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
  20207. * content of the 32-bit elements of Rs2.
  20208. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.
  20209. *
  20210. * **Operations**:\n
  20211. * ~~~
  20212. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1];
  20213. * x=1...0
  20214. * ~~~
  20215. *
  20216. * \param [in] a unsigned long long type of value stored in a
  20217. * \param [in] b unsigned long long type of value stored in b
  20218. * \return value stored in unsigned long long type
  20219. */
  20220. __STATIC_FORCEINLINE unsigned long long __RV_DSMBT16(unsigned long long a, unsigned long long b) /* pass */
  20221. {
  20222. unsigned long long result;
  20223. __ASM volatile("dsmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20224. return result;
  20225. }
  20226. /* ===== Inline Function End for DSMBT16 ===== */
  20227. /* ===== Inline Function Start for DSMTT16 ===== */
  20228. /**
  20229. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20230. * \brief DSMTT16 (Signed Multiply Top Half & Top Half)
  20231. * \details
  20232. * **Type**: SIMD
  20233. *
  20234. * **Syntax**:\n
  20235. * ~~~
  20236. * DSMTT16 Rd, Rs1, Rs2
  20237. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20238. * ~~~
  20239. *
  20240. * **Purpose**:\n
  20241. * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit
  20242. * elements of another register and write the result to a third register.
  20243. * * DSMTT16: W[x].top * W[x].top
  20244. *
  20245. * **Description**:\n
  20246. * For the `DSMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
  20247. * content of the 32-bit elements of Rs2.
  20248. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.
  20249. *
  20250. * **Operations**:\n
  20251. * ~~~
  20252. * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1];
  20253. * x=1...0
  20254. * ~~~
  20255. *
  20256. * \param [in] a unsigned long long type of value stored in a
  20257. * \param [in] b unsigned long long type of value stored in b
  20258. * \return value stored in unsigned long long type
  20259. */
  20260. __STATIC_FORCEINLINE unsigned long long __RV_DSMTT16(unsigned long long a, unsigned long long b)
  20261. {
  20262. unsigned long long result;
  20263. __ASM volatile("dsmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20264. return result;
  20265. }
  20266. /* ===== Inline Function End for DSMTT16 ===== */
  20267. /* ===== Inline Function Start for DRCRSA16 ===== */
  20268. /**
  20269. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20270. * \brief DRCRSA16 (16-bit Signed Halving Cross Subtraction & Addition)
  20271. * \details
  20272. * **Type**: SIMD
  20273. *
  20274. * **Syntax**:\n
  20275. * ~~~
  20276. * DRCRSA16 Rd, Rs1, Rs2
  20277. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20278. * ~~~
  20279. *
  20280. * **Purpose**:\n
  20281. * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in a 32-bit chunk simultaneously.
  20282. * Operands are from crossed positions in 32-bit chunks. The results are halved to avoid overflow or saturation.
  20283. *
  20284. * **Description**:\n
  20285. * This instruction subtracts the 16-bit signed integer in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer in
  20286. * [15:0] of 32-bit chunks in Rs2, and adds the 16-bit signed integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed
  20287. * integer in [15:0] of 32-bit chunks in Rs1. The element results are first logically right-shifted by 1 bit and then written to
  20288. * [31:16] of 32- bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  20289. *
  20290. * **Operations**:\n
  20291. * ~~~
  20292. * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) s>> 1;
  20293. * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) s>> 1;
  20294. * x=1...0
  20295. * ~~~
  20296. *
  20297. * \param [in] a unsigned long long type of value stored in a
  20298. * \param [in] b unsigned long long type of value stored in b
  20299. * \return value stored in unsigned long long type
  20300. */
  20301. __STATIC_FORCEINLINE unsigned long long __RV_DRCRSA16(unsigned long long a, unsigned long long b)
  20302. {
  20303. unsigned long long result;
  20304. __ASM volatile("drcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20305. return result;
  20306. }
  20307. /* ===== Inline Function End for DRCRSA16 ===== */
  20308. /* ===== Inline Function Start for DRCRSA32 ===== */
  20309. /**
  20310. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20311. * \brief DRCRSA32 (32-bit Signed Halving Cross Subtraction & Addition)
  20312. * \details
  20313. * **Type**: SIMD
  20314. *
  20315. * **Syntax**:\n
  20316. * ~~~
  20317. * DRCRSA32 Rd, Rs1, Rs2
  20318. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20319. * ~~~
  20320. *
  20321. * **Purpose**:\n
  20322. * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in a 64-bit chunk simultaneously.
  20323. * Operands are from crossed 32-bit elements. The results are halved to avoid overflow or saturation.
  20324. *
  20325. * **Description**:\n
  20326. * This instruction subtracts the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit signed integer element in
  20327. * [31:0] of Rs2, and adds the 32-bit signed integer element in [63:32] of Rs2 from the 32-bit signed integer element in [31:0]
  20328. * of Rs1. The element results are first arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and
  20329. * [31:0] of Rd for subtraction.
  20330. *
  20331. * **Operations**:\n
  20332. * ~~~
  20333. * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) s>> 1;
  20334. * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) s>> 1;
  20335. * ~~~
  20336. *
  20337. * \param [in] a unsigned long long type of value stored in a
  20338. * \param [in] b unsigned long long type of value stored in b
  20339. * \return value stored in unsigned long long type
  20340. */
  20341. __STATIC_FORCEINLINE unsigned long long __RV_DRCRSA32(unsigned long long a, unsigned long long b)
  20342. {
  20343. unsigned long long result;
  20344. __ASM volatile("drcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20345. return result;
  20346. }
  20347. /* ===== Inline Function End for DRCRSA32 ===== */
  20348. /* ===== Inline Function Start for DRCRAS16 ===== */
  20349. /**
  20350. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20351. * \brief DRCRAS16 (16-bit Signed Halving Cross Addition & Subtraction)
  20352. * \details
  20353. * **Type**: SIMD
  20354. *
  20355. * **Syntax**:\n
  20356. * ~~~
  20357. * DRCRAS16 Rd, Rs1, Rs2
  20358. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20359. * ~~~
  20360. *
  20361. * **Purpose**:\n
  20362. * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in a 32-bit chunk simultaneously.
  20363. * Operands are from crossed positions in 32-bit chunks. The results are halved to avoid overflow or saturation.
  20364. *
  20365. * **Description**:\n
  20366. * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in
  20367. * [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit
  20368. * unsigned integer in [15:0] of 32-bit chunks in Rs1. The element results are first logically right-shifted by 1 bit and then
  20369. * written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  20370. *
  20371. * **Operations**:\n
  20372. * ~~~
  20373. * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) s>> 1;
  20374. * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) s>> 1;
  20375. * x=1...0
  20376. * ~~~
  20377. *
  20378. * \param [in] a unsigned long long type of value stored in a
  20379. * \param [in] b unsigned long long type of value stored in b
  20380. * \return value stored in unsigned long long type
  20381. */
  20382. __STATIC_FORCEINLINE unsigned long long __RV_DRCRAS16(unsigned long long a, unsigned long long b)
  20383. {
  20384. unsigned long long result;
  20385. __ASM volatile("drcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20386. return result;
  20387. }
  20388. /* ===== Inline Function End for DRCRAS16 ===== */
  20389. /* ===== Inline Function Start for DRCRAS32 ===== */
  20390. /**
  20391. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20392. * \brief DRCRAS32 (32-bit Signed Cross Addition & Subtraction)
  20393. * \details
  20394. * **Type**: SIMD
  20395. *
  20396. * **Syntax**:\n
  20397. * ~~~
  20398. * DRCRAS32 Rd, Rs1, Rs2
  20399. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20400. * ~~~
  20401. *
  20402. * **Purpose**:\n
  20403. * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in a 64-bit chunk simultaneously.
  20404. * Operands are from crossed 32-bit elements. The results are halved to avoid overflow or saturation.
  20405. *
  20406. * **Description**:\n
  20407. * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit signed integer element in [31:0]
  20408. * of Rs2, and subtracts the 32-bit signed integer element in [63:32] of Rs2 from the 32-bit signed integer element in [31:0]
  20409. * of Rs1. The element results are first arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition
  20410. * and [31:0] of Rd for subtraction.
  20411. *
  20412. * **Operations**:\n
  20413. * ~~~
  20414. * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) s>> 1;
  20415. * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) s>> 1;
  20416. * ~~~
  20417. *
  20418. * \param [in] a unsigned long long type of value stored in a
  20419. * \param [in] b unsigned long long type of value stored in b
  20420. * \return value stored in unsigned long long type
  20421. */
  20422. __STATIC_FORCEINLINE unsigned long long __RV_DRCRAS32(unsigned long long a, unsigned long long b)
  20423. {
  20424. unsigned long long result;
  20425. __ASM volatile("DRCRAS32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20426. return result;
  20427. }
  20428. /* ===== Inline Function End for DRCRAS32 ===== */
  20429. /* ===== Inline Function Start for DKCRAS16 ===== */
  20430. /**
  20431. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20432. * \brief DKCRAS16 (16-bit Signed Saturating Cross Addition & Subtraction)
  20433. * \details
  20434. * **Type**: SIMD
  20435. *
  20436. * **Syntax**:\n
  20437. * ~~~
  20438. * DKCRAS16 Rd, Rs1, Rs2
  20439. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20440. * ~~~
  20441. *
  20442. * **Purpose**:\n
  20443. * Do 16-bit signed integer element saturating addition and 16-bit signed integer element saturating subtraction in a 32-bit
  20444. * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
  20445. *
  20446. * **Description**:\n
  20447. * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer
  20448. * element in [15:0] of 32-bit chunks in Rs2; at the same time, it subtracts the 16-bit signed integer element in [31:16] of
  20449. * 32-bit chunks in Rs2 from the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
  20450. * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
  20451. * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks
  20452. * in Rd for addition.
  20453. *
  20454. * **Operations**:\n
  20455. * ~~~
  20456. * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
  20457. * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
  20458. * for (res in [res1, res2]) {
  20459. * if (res > (2^15)-1) {
  20460. * res = (2^15)-1;
  20461. * OV = 1;
  20462. * } else if (res < -2^15) {
  20463. * res = -2^15;
  20464. * OV = 1;
  20465. * }
  20466. * }
  20467. * Rd.W[x][31:16] = res1;
  20468. * Rd.W[x][15:0] = res2;
  20469. * x=1...0
  20470. * ~~~
  20471. *
  20472. * \param [in] a unsigned long long type of value stored in a
  20473. * \param [in] b unsigned long long type of value stored in b
  20474. * \return value stored in unsigned long long type
  20475. */
  20476. __STATIC_FORCEINLINE unsigned long long __RV_DKCRAS16(unsigned long long a, unsigned long long b)
  20477. {
  20478. unsigned long long result;
  20479. __ASM volatile("dkcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20480. return result;
  20481. }
  20482. /* ===== Inline Function End for DKCRAS16 ===== */
  20483. /* ===== Inline Function Start for DKCRSA16 ===== */
  20484. /**
  20485. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20486. * \brief DKCRSA16 (16-bit Signed Saturating Cross Subtraction & Addition)
  20487. * \details
  20488. * **Type**: SIMD
  20489. *
  20490. * **Syntax**:\n
  20491. * ~~~
  20492. * DKCRSA16 Rd, Rs1, Rs2
  20493. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20494. * ~~~
  20495. *
  20496. * **Purpose**:\n
  20497. * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element saturating addition in a 32-bit
  20498. * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
  20499. *
  20500. * **Description**:\n
  20501. * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer
  20502. * element in [31:16] of 32-bit chunks in Rs1; at the same time, it adds the 16-bit signed integer element in [31:16] of 32-bit
  20503. * chunks in Rs2 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
  20504. * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
  20505. * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks
  20506. * in Rd for subtraction.
  20507. *
  20508. * **Operations**:\n
  20509. * ~~~
  20510. * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
  20511. * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
  20512. * for (res in [res1, res2]) {
  20513. * if (res > (2^15)-1) {
  20514. * res = (2^15)-1;
  20515. * OV = 1;
  20516. * } else if (res < -2^15) {
  20517. * res = -2^15;
  20518. * OV = 1;
  20519. * }
  20520. * }
  20521. * Rd.W[x][31:16] = res1;
  20522. * Rd.W[x][15:0] = res2;
  20523. * x=1...0
  20524. * ~~~
  20525. *
  20526. * \param [in] a unsigned long long type of value stored in a
  20527. * \param [in] b unsigned long long type of value stored in b
  20528. * \return value stored in unsigned long long type
  20529. */
  20530. __STATIC_FORCEINLINE unsigned long long __RV_DKCRSA16(unsigned long long a, unsigned long long b)
  20531. {
  20532. unsigned long long result;
  20533. __ASM volatile("dkcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20534. return result;
  20535. }
  20536. /* ===== Inline Function End for DKCRSA16 ===== */
  20537. /* ===== Inline Function Start for DRSUB16 ===== */
  20538. /**
  20539. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20540. * \brief DRSUB16 (16-bit Signed Halving Subtraction)
  20541. * \details
  20542. * **Type**: SIMD
  20543. *
  20544. * **Syntax**:\n
  20545. * ~~~
  20546. * DRSUB16 Rd, Rs1, Rs2
  20547. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20548. * ~~~
  20549. *
  20550. * **Purpose**:\n
  20551. * Do 16-bit signed integer element subtractions simultaneously. The results are halved to avoid overflow or saturation.
  20552. *
  20553. * **Description**:\n
  20554. * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit signed integer elements in Rs1. The
  20555. * results are first arithmetically right-shifted by 1 bit and then written to Rd.
  20556. *
  20557. * **Operations**:\n
  20558. * ~~~
  20559. * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) s>> 1;
  20560. * x=3...0
  20561. * ~~~
  20562. *
  20563. * \param [in] a unsigned long long type of value stored in a
  20564. * \param [in] b unsigned long long type of value stored in b
  20565. * \return value stored in unsigned long long type
  20566. */
  20567. __STATIC_FORCEINLINE unsigned long long __RV_DRSUB16(unsigned long long a, unsigned long long b)
  20568. {
  20569. unsigned long long result;
  20570. __ASM volatile("drsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20571. return result;
  20572. }
  20573. /* ===== Inline Function End for DRSUB16 ===== */
  20574. /* ===== Inline Function Start for DSTSA32 ===== */
  20575. /**
  20576. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20577. * \brief DSTSA32 (32-bit Straight Subtraction & Addition)
  20578. * \details
  20579. * **Type**: SIMD
  20580. *
  20581. * **Syntax**:\n
  20582. * ~~~
  20583. * DSTSA32 Rd, Rs1, Rs2
  20584. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20585. * ~~~
  20586. *
  20587. * **Purpose**:\n
  20588. * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit chunk simultaneously. Operands are
  20589. * from corresponding 32-bit elements.
  20590. *
  20591. * **Description**:\n
  20592. * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [63:32] of Rs1,
  20593. * and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit
  20594. * integer element in [31:0] of Rs2, and writes the result to [31:0] of Rd.
  20595. *
  20596. * **Operations**:\n
  20597. * ~~~
  20598. * Rd.W[1] = Rs1.W[1] - Rs2.W[1];
  20599. * Rd.W[0] = Rs1.W[0] + Rs2.W[0];
  20600. * ~~~
  20601. *
  20602. * \param [in] a unsigned long long type of value stored in a
  20603. * \param [in] b unsigned long long type of value stored in b
  20604. * \return value stored in unsigned long long type
  20605. */
  20606. __STATIC_FORCEINLINE unsigned long long __RV_DSTSA32(unsigned long long a, unsigned long long b)
  20607. {
  20608. unsigned long long result;
  20609. __ASM volatile("dstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20610. return result;
  20611. }
  20612. /* ===== Inline Function End for DSTSA32 ===== */
  20613. /* ===== Inline Function Start for DSTAS32 ===== */
  20614. /**
  20615. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20616. * \brief DSTAS32 (SIMD 32-bit Straight Addition & Subtractionn)
  20617. * \details
  20618. * **Type**: SIMD
  20619. *
  20620. * **Syntax**:\n
  20621. * ~~~
  20622. * DSTAS32 Rd, Rs1, Rs2
  20623. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20624. * ~~~
  20625. *
  20626. * **Purpose**:\n
  20627. * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit chunk simultaneously. Operands are
  20628. * from corresponding 32-bit elements.
  20629. *
  20630. * **Description**:\n
  20631. * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [63:32] of Rs2,
  20632. * and writes the result to [63:32] of Rd; at the same time, it subtracts the 32-bit integer element in [31:0] of Rs2
  20633. * from the 32-bit integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.
  20634. *
  20635. * **Operations**:\n
  20636. * ~~~
  20637. * Rd.W[1] = Rs1.W[1] + Rs2.W[1];
  20638. * Rd.W[0] = Rs1.W[0] - Rs2.W[0];
  20639. * ~~~
  20640. *
  20641. * \param [in] a unsigned long long type of value stored in a
  20642. * \param [in] b unsigned long long type of value stored in b
  20643. * \return value stored in unsigned long long type
  20644. */
  20645. __STATIC_FORCEINLINE unsigned long long __RV_DSTAS32(unsigned long long a, unsigned long long b)
  20646. {
  20647. unsigned long long result;
  20648. __ASM volatile("DSTAS32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20649. return result;
  20650. }
  20651. /* ===== Inline Function End for DSTAS32 ===== */
  20652. /* ===== Inline Function Start for DKCRSA32 ===== */
  20653. /**
  20654. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20655. * \brief DKCRSA32 (32-bit Signed Saturating Cross Subtraction & Addition)
  20656. * \details
  20657. * **Type**: SIMD
  20658. *
  20659. * **Syntax**:\n
  20660. * ~~~
  20661. * DKCRSA32 Rd, Rs1, Rs2
  20662. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20663. * ~~~
  20664. *
  20665. * **Purpose**:\n
  20666. * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element saturating addition in a 64-bit
  20667. * chunk simultaneously. Operands are from crossed 32-bit elements.
  20668. *
  20669. * **Description**:\n
  20670. * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [63:32] of Rs1; at
  20671. * the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2. If any
  20672. * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is
  20673. * set to 1. The saturated results are written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
  20674. *
  20675. * **Operations**:\n
  20676. * ~~~
  20677. * res[1] = Rs1.W[1] - Rs2.W[0];
  20678. * res[0] = Rs1.W[0] + Rs2.W[1];
  20679. * if (res[x] > (2^31)-1) {
  20680. * res[x] = (2^31)-1;
  20681. * OV = 1;
  20682. * } else if (res < -2^31) {
  20683. * res[x] = -2^31;
  20684. * OV = 1;
  20685. * }
  20686. * Rd.W[1] = res[1];
  20687. * Rd.W[0] = res[0];
  20688. * ~~~
  20689. *
  20690. * \param [in] a unsigned long long type of value stored in a
  20691. * \param [in] b unsigned long long type of value stored in b
  20692. * \return value stored in unsigned long long type
  20693. */
  20694. __STATIC_FORCEINLINE unsigned long long __RV_DKCRSA32(unsigned long long a, unsigned long long b)
  20695. {
  20696. unsigned long long result;
  20697. __ASM volatile("dkcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20698. return result;
  20699. }
  20700. /* ===== Inline Function End for DKCRSA32 ===== */
  20701. /* ===== Inline Function Start for DKCRAS32 ===== */
  20702. /**
  20703. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20704. * \brief DKCRAS32 (32-bit Signed Saturating Cross Addition & Subtraction)
  20705. * \details
  20706. * **Type**: SIMD
  20707. *
  20708. * **Syntax**:\n
  20709. * ~~~
  20710. * DKCRAS32 Rd, Rs1, Rs2
  20711. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20712. * ~~~
  20713. *
  20714. * **Purpose**:\n
  20715. * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element saturating addition in a 64-bit
  20716. * chunk simultaneously. Operands are from crossed 32-bit elements.
  20717. *
  20718. * **Description**:\n
  20719. * This instruction adds the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [63:32] of Rs1; at the
  20720. * same time, it subtracts the 32-bit integer element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2. If any
  20721. * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is
  20722. * set to 1. The saturated results are written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
  20723. *
  20724. * **Operations**:\n
  20725. * ~~~
  20726. * res[1] = Rs1.W[1] + Rs2.W[0];
  20727. * res[0] = Rs1.W[0] - Rs2.W[1];
  20728. * if (res[x] > (2^31)-1) {
  20729. * res[x] = (2^31)-1;
  20730. * OV = 1;
  20731. * } else if (res < -2^31) {
  20732. * res[x] = -2^31;
  20733. * OV = 1;
  20734. * }
  20735. * Rd.W[1] = res[1];
  20736. * Rd.W[0] = res[0];
  20737. * ~~~
  20738. *
  20739. * \param [in] a unsigned long long type of value stored in a
  20740. * \param [in] b unsigned long long type of value stored in b
  20741. * \return value stored in unsigned long long type
  20742. */
  20743. __STATIC_FORCEINLINE unsigned long long __RV_DKCRAS32(unsigned long long a, unsigned long long b)
  20744. {
  20745. unsigned long long result;
  20746. __ASM volatile("dkcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20747. return result;
  20748. }
  20749. /* ===== Inline Function End for DKCRAS32 ===== */
  20750. /* ===== Inline Function Start for DCRSA32 ===== */
  20751. /**
  20752. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20753. * \brief DCRSA32 (32-bit Cross Subtraction & Addition)
  20754. * \details
  20755. * **Type**: SIMD
  20756. *
  20757. * **Syntax**:\n
  20758. * ~~~
  20759. * DCRSA32 Rd, Rs1, Rs2
  20760. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20761. * ~~~
  20762. *
  20763. * **Purpose**:\n
  20764. * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit chunk simultaneously. Operands are
  20765. * from crossed 32-bit elements.
  20766. *
  20767. * **Description**:\n
  20768. * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and
  20769. * writes the result to [63:32] of Rd; at the same time, it subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit
  20770. * integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.
  20771. *
  20772. * **Operations**:\n
  20773. * ~~~
  20774. * res[1] = Rs1.W[1] - Rs2.W[0];
  20775. * res[0] = Rs1.W[0] + Rs2.W[1];
  20776. * ~~~
  20777. *
  20778. * \param [in] a unsigned long long type of value stored in a
  20779. * \param [in] b unsigned long long type of value stored in b
  20780. * \return value stored in unsigned long long type
  20781. */
  20782. __STATIC_FORCEINLINE unsigned long long __RV_DCRSA32(unsigned long long a, unsigned long long b)
  20783. {
  20784. unsigned long long result;
  20785. __ASM volatile("dcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20786. return result;
  20787. }
  20788. /* ===== Inline Function End for DCRSA32 ===== */
  20789. /* ===== Inline Function Start for DCRAS32 ===== */
  20790. /**
  20791. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20792. * \brief DCRAS32 (32-bit Cross Addition & Subtraction)
  20793. * \details
  20794. * **Type**: SIMD
  20795. *
  20796. * **Syntax**:\n
  20797. * ~~~
  20798. * DCRAS32 Rd, Rs1, Rs2
  20799. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20800. * ~~~
  20801. *
  20802. * **Purpose**:\n
  20803. * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit chunk simultaneously. Operands are
  20804. * from crossed 32-bit elements.
  20805. *
  20806. * **Description**:\n
  20807. * This instruction subtracts the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and
  20808. * writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer element in [63:32] of Rs2 from the 32-bit
  20809. * integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.
  20810. *
  20811. * **Operations**:\n
  20812. * ~~~
  20813. * res[1] = Rs1.W[1] - Rs2.W[0];
  20814. * res[0] = Rs1.W[0] + Rs2.W[1];
  20815. * ~~~
  20816. *
  20817. * \param [in] a unsigned long long type of value stored in a
  20818. * \param [in] b unsigned long long type of value stored in b
  20819. * \return value stored in unsigned long long type
  20820. */
  20821. __STATIC_FORCEINLINE unsigned long long __RV_DCRAS32(unsigned long long a, unsigned long long b)
  20822. {
  20823. unsigned long long result;
  20824. __ASM volatile("dcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20825. return result;
  20826. }
  20827. /* ===== Inline Function End for DCRAS32 ===== */
  20828. /* ===== Inline Function Start for DKSTSA16 ===== */
  20829. /**
  20830. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20831. * \brief DKSTSA16 (16-bit Signed Saturating Straight Subtraction & Addition)
  20832. * \details
  20833. * **Type**: SIMD
  20834. *
  20835. * **Syntax**:\n
  20836. * ~~~
  20837. * DKSTSA16 Rd, Rs1, Rs2
  20838. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20839. * ~~~
  20840. *
  20841. * **Purpose**:\n
  20842. * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element saturating addition in a 32-bit
  20843. * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
  20844. *
  20845. * **Description**:\n
  20846. * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer
  20847. * element in [31:16] of 32-bit chunks in Rs1; at the same time, it adds the 16-bit signed integer element in [15:0] of 32-bit
  20848. * chunks in Rs2 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
  20849. * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
  20850. * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks
  20851. * in Rd for addition.
  20852. *
  20853. * **Operations**:\n
  20854. * ~~~
  20855. * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
  20856. * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
  20857. * for (res in [res1, res2]) {
  20858. * if (res > (2^15)-1) {
  20859. * res = (2^15)-1;
  20860. * OV = 1;
  20861. * } else if (res < -2^15) {
  20862. * res = -2^15;
  20863. * OV = 1;
  20864. * }
  20865. * }
  20866. * Rd.W[x][31:16] = res1;
  20867. * Rd.W[x][15:0] = res2;
  20868. * x=1...0
  20869. * ~~~
  20870. *
  20871. * \param [in] a unsigned long long type of value stored in a
  20872. * \param [in] b unsigned long long type of value stored in b
  20873. * \return value stored in unsigned long long type
  20874. */
  20875. __STATIC_FORCEINLINE unsigned long long __RV_DKSTSA16(unsigned long long a, unsigned long long b)
  20876. {
  20877. unsigned long long result;
  20878. __ASM volatile("dkstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20879. return result;
  20880. }
  20881. /* ===== Inline Function End for DKSTSA16 ===== */
  20882. /* ===== Inline Function Start for DKSTAS16 ===== */
  20883. /**
  20884. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20885. * \brief DKSTAS16 (16-bit Signed Saturating Straight Addition & Subtraction)
  20886. * \details
  20887. * **Type**: SIMD
  20888. *
  20889. * **Syntax**:\n
  20890. * ~~~
  20891. * DKSTAS16 Rd, Rs1, Rs2
  20892. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20893. * ~~~
  20894. *
  20895. * **Purpose**:\n
  20896. * Do 16-bit signed integer element saturating addition and 16-bit signed integer element saturating subtraction in a 32-bit
  20897. * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
  20898. *
  20899. * **Description**:\n
  20900. * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer
  20901. * element in [31:16] of 32-bit chunks in Rs2; at the same time, it subtracts the 16-bit signed integer element in [15:0] of
  20902. * 32-bit chunks in Rs2 from the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
  20903. * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
  20904. * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks
  20905. * in Rd for addition.
  20906. *
  20907. * **Operations**:\n
  20908. * ~~~
  20909. * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
  20910. * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
  20911. * for (res in [res1, res2]) {
  20912. * if (res > (2^15)-1) {
  20913. * res = (2^15)-1;
  20914. * OV = 1;
  20915. * } else if (res < -2^15) {
  20916. * res = -2^15;
  20917. * OV = 1;
  20918. * }
  20919. * }
  20920. * Rd.W[x][31:16] = res1;
  20921. * Rd.W[x][15:0] = res2;
  20922. * x=1...0
  20923. * ~~~
  20924. *
  20925. * \param [in] a unsigned long long type of value stored in a
  20926. * \param [in] b unsigned long long type of value stored in b
  20927. * \return value stored in unsigned long long type
  20928. */
  20929. __STATIC_FORCEINLINE unsigned long long __RV_DKSTAS16(unsigned long long a, unsigned long long b)
  20930. {
  20931. unsigned long long result;
  20932. __ASM volatile("dkstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20933. return result;
  20934. }
  20935. /* ===== Inline Function End for DKSTAS16 ===== */
  20936. /* ===== Inline Function Start for DSCLIP8 ===== */
  20937. /**
  20938. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20939. * \brief DSCLIP8 (8-bit Signed Saturation and Clip)
  20940. * \details
  20941. * **Type**: SIMD
  20942. *
  20943. * **Syntax**:\n
  20944. * ~~~
  20945. * DSCLIP8 Rd, Rs1, imm3u[2:0]
  20946. * # Rd, Rs1 are all even/odd pair of registers
  20947. * ~~~
  20948. *
  20949. * **Purpose**:\n
  20950. * Limit the 8-bit signed integer elements of a register into a signed range simultaneously.
  20951. *
  20952. * **Description**:\n
  20953. * This instruction limits the 8-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm3u and
  20954. * 2^imm3u-1, and writes the limited results to Rd. For example, if imm3u is 3, the 8-bit input values should be saturated
  20955. * between 7 and -8. If saturation is performed, set OV bit to 1.
  20956. *
  20957. * **Operations**:\n
  20958. * ~~~
  20959. * src = Rs1.B[x];
  20960. * if (src > (2^imm3u)-1) {
  20961. * src = (2^imm3u)-1;
  20962. * OV = 1;
  20963. * } else if (src < -2^imm3u) {
  20964. * src = -2^imm3u;
  20965. * OV = 1;
  20966. * }
  20967. * Rd.B[x] = src
  20968. * x=7...0
  20969. * ~~~
  20970. *
  20971. * \param [in] a unsigned long long type of value stored in a
  20972. * \param [in] b unsigned long long type of value stored in b
  20973. * \return value stored in unsigned long long type
  20974. */
  20975. #define __RV_DSCLIP8(a, b) \
  20976. ({ \
  20977. unsigned long long result; \
  20978. unsigned long long __a = (unsigned long long)(a); \
  20979. __ASM volatile("dsclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  20980. result; \
  20981. })
  20982. /* ===== Inline Function End for DSCLIP8 ===== */
  20983. /* ===== Inline Function Start for DSCLIP16 ===== */
  20984. /**
  20985. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20986. * \brief DSCLIP16 (16-bit Signed Saturation and Clip)
  20987. * \details
  20988. * **Type**: SIMD
  20989. *
  20990. * **Syntax**:\n
  20991. * ~~~
  20992. * DSCLIP16 Rd, Rs1, imm4u[3:0]
  20993. * # Rd, Rs1 are all even/odd pair of registers
  20994. * ~~~
  20995. *
  20996. * **Purpose**:\n
  20997. * Limit the 16-bit signed integer elements of a register into a signed range simultaneously.
  20998. *
  20999. * **Description**:\n
  21000. * This instruction limits the 16-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm4u and
  21001. * 2^imm4u-1, and writes the limited results to Rd. For example, if imm4u is 3, the 32-bit input values should be saturated
  21002. * between 7 and -8. If saturation is performed, set OV bit to 1.
  21003. *
  21004. * **Operations**:\n
  21005. * ~~~
  21006. * src = Rs1.H[x];
  21007. * if (src > (2^imm4u)-1) {
  21008. * src = (2^imm4u)-1;
  21009. * OV = 1;
  21010. * } else if (src < -2^imm4u) {
  21011. * src = -2^imm4u;
  21012. * OV = 1;
  21013. * }
  21014. * Rd.H[x] = src
  21015. * x=3...0
  21016. * ~~~
  21017. *
  21018. * \param [in] a unsigned long long type of value stored in a
  21019. * \param [in] b unsigned long long type of value stored in b
  21020. * \return value stored in unsigned long long type
  21021. */
  21022. #define __RV_DSCLIP16(a, b) \
  21023. ({ \
  21024. unsigned long long result; \
  21025. unsigned long long __a = (unsigned long long)(a); \
  21026. __ASM volatile("dsclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  21027. result; \
  21028. })
  21029. /* ===== Inline Function End for DSCLIP16 ===== */
  21030. /* ===== Inline Function Start for DSCLIP32 ===== */
  21031. /**
  21032. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21033. * \brief DSCLIP32 (32-bit Signed Saturation and Clip)
  21034. * \details
  21035. * **Type**: SIMD
  21036. *
  21037. * **Syntax**:\n
  21038. * ~~~
  21039. * DSCLIP32 Rd, Rs1, imm5u[4:0]
  21040. * # Rd, Rs1 are all even/odd pair of registers
  21041. * ~~~
  21042. *
  21043. * **Purpose**:\n
  21044. * Limit the 32-bit signed integer elements of a register into a signed range simultaneously.
  21045. *
  21046. * **Description**:\n
  21047. * This instruction limits the 32-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm5u and
  21048. * 2^imm5u-1, and writes the limited results to Rd. For example, if imm5u is 3, the 32-bit input values should be saturated
  21049. * between 7 and -8. If saturation is performed, set OV bit to 1.
  21050. *
  21051. * **Operations**:\n
  21052. * ~~~
  21053. * src = Rs1.W[x];
  21054. * if (src > (2^imm5u)-1) {
  21055. * src = (2^imm5u)-1;
  21056. * OV = 1;
  21057. * } else if (src < -2^imm5u) {
  21058. * src = -2^imm5u;
  21059. * OV = 1;
  21060. * }
  21061. * Rd.W[x] = src
  21062. * x=1...0
  21063. * ~~~
  21064. *
  21065. * \param [in] a unsigned long long type of value stored in a
  21066. * \param [in] b unsigned long long type of value stored in b
  21067. * \return value stored in unsigned long long type
  21068. */
  21069. #define __RV_DSCLIP32(a, b) \
  21070. ({ \
  21071. unsigned long long result; \
  21072. unsigned long long __a = (unsigned long long)(a); \
  21073. __ASM volatile("dsclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  21074. result; \
  21075. })
  21076. /* ===== Inline Function End for DSCLIP32 ===== */
  21077. /* ===== Inline Function Start for DRSUB32 ===== */
  21078. /**
  21079. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21080. * \brief DRSUB32 (32-bit Signed Halving Subtraction)
  21081. * \details
  21082. * **Type**: SIMD
  21083. *
  21084. * **Syntax**:\n
  21085. * ~~~
  21086. * DRSUB32 Rd, Rs1, Rs2
  21087. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  21088. * ~~~
  21089. *
  21090. * **Purpose**:\n
  21091. * Do 32-bit signed integer element subtractions simultaneously. The results are halved to avoid overflow or saturation.
  21092. *
  21093. * **Description**:\n
  21094. * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1. The
  21095. * results are first arithmetically right-shifted by 1 bit and then written to Rd.
  21096. *
  21097. * **Operations**:\n
  21098. * ~~~
  21099. * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) s>> 1;
  21100. * x=1...0
  21101. * ~~~
  21102. *
  21103. * \param [in] a unsigned long long type of value stored in a
  21104. * \param [in] b unsigned long long type of value stored in b
  21105. * \return value stored in unsigned long long type
  21106. */
  21107. __STATIC_FORCEINLINE unsigned long long __RV_DRSUB32(unsigned long long a, unsigned long long b)
  21108. {
  21109. unsigned long long result;
  21110. __ASM volatile("drsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  21111. return result;
  21112. }
  21113. /* ===== Inline Function End for DRSUB32 ===== */
  21114. /* ===== Inline Function Start for DPACK32 ===== */
  21115. /**
  21116. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21117. * \brief DPACK32 (SIMD Pack Two 32-bit Data To 64-bit)
  21118. * \details
  21119. * **Type**: SIMD
  21120. *
  21121. * **Syntax**:\n
  21122. * ~~~
  21123. * DPACK32 Rd, Rs1, Rs2
  21124. * # Rd is even/odd pair of register
  21125. * ~~~
  21126. *
  21127. * **Purpose**:\n
  21128. * Pack two 32-bit datas which from two registers into a 64-bit data.
  21129. *
  21130. * **Description**:\n
  21131. * This instruction moves 32-bit Rs1 to Rd.W[1] and moves 32-bit Rs2 to Rd.W[0].
  21132. *
  21133. * **Operations**:\n
  21134. * ~~~
  21135. * Rd = CONCAT(Rs1.W , Rs2.W);
  21136. * ~~~
  21137. *
  21138. * \param [in] a signed long type of value stored in a
  21139. * \param [in] b signed long type of value stored in b
  21140. * \return value stored in unsigned long long type
  21141. */
  21142. __STATIC_FORCEINLINE unsigned long long __RV_DPACK32(signed long a, signed long b)
  21143. {
  21144. unsigned long long result;
  21145. __ASM volatile("dpack32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  21146. return result;
  21147. }
  21148. /* ===== Inline Function End for DPACK32 ===== */
  21149. /* ===== Inline Function Start for DSUNPKD810 ===== */
  21150. /**
  21151. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21152. * \brief DSUNPKD810 (Signed Unpacking Bytes 1 & 0)
  21153. * \details
  21154. * **Type**: SIMD
  21155. *
  21156. * **Syntax**:\n
  21157. * ~~~
  21158. * DSUNPKD810 Rd, Rs1
  21159. * # Rd, Rs1 are all even/odd pair of registers
  21160. * ~~~
  21161. *
  21162. * **Purpose**:\n
  21163. * Unpack byte 1 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
  21164. *
  21165. * **Description**:\n
  21166. * For the `DSUNPKD810` instruction, it unpacks byte 1 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
  21167. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21168. *
  21169. * **Operations**:\n
  21170. * ~~~
  21171. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[1])
  21172. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
  21173. * ~~~
  21174. *
  21175. * \param [in] a unsigned long long type of value stored in a
  21176. * \return value stored in unsigned long long type
  21177. */
  21178. __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD810(unsigned long long a)
  21179. {
  21180. unsigned long long result;
  21181. __ASM volatile("dsunpkd810 %0, %1" : "=r"(result) : "r"(a));
  21182. return result;
  21183. }
  21184. /* ===== Inline Function End for DSUNPKD810 ===== */
  21185. /* ===== Inline Function Start for DSUNPKD820 ===== */
  21186. /**
  21187. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21188. * \brief DSUNPKD820 (Signed Unpacking Bytes 2 & 0)
  21189. * \details
  21190. * **Type**: SIMD
  21191. *
  21192. * **Syntax**:\n
  21193. * ~~~
  21194. * DSUNPKD820 Rd, Rs1
  21195. * # Rd, Rs1 are all even/odd pair of registers
  21196. * ~~~
  21197. *
  21198. * **Purpose**:\n
  21199. * Unpack byte 2 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
  21200. *
  21201. * **Description**:\n
  21202. * For the `DSUNPKD820` instruction, it unpacks byte 2 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
  21203. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21204. *
  21205. * **Operations**:\n
  21206. * ~~~
  21207. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[2])
  21208. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
  21209. * ~~~
  21210. *
  21211. * \param [in] a unsigned long long type of value stored in a
  21212. * \return value stored in unsigned long long type
  21213. */
  21214. __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD820(unsigned long long a)
  21215. {
  21216. unsigned long long result;
  21217. __ASM volatile("dsunpkd820 %0, %1" : "=r"(result) : "r"(a));
  21218. return result;
  21219. }
  21220. /* ===== Inline Function End for DSUNPKD820 ===== */
  21221. /* ===== Inline Function Start for DSUNPKD830 ===== */
  21222. /**
  21223. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21224. * \brief DSUNPKD830 (Signed Unpacking Bytes 3 & 0)
  21225. * \details
  21226. * **Type**: SIMD
  21227. *
  21228. * **Syntax**:\n
  21229. * ~~~
  21230. * DSUNPKD830 Rd, Rs1
  21231. * # Rd, Rs1 are all even/odd pair of registers
  21232. * ~~~
  21233. *
  21234. * **Purpose**:\n
  21235. * Unpack byte 3 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
  21236. *
  21237. * **Description**:\n
  21238. * For the `DSUNPKD830` instruction, it unpacks byte 3 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
  21239. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21240. *
  21241. * **Operations**:\n
  21242. * ~~~
  21243. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
  21244. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
  21245. * ~~~
  21246. *
  21247. * \param [in] a unsigned long long type of value stored in a
  21248. * \return value stored in unsigned long long type
  21249. */
  21250. __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD830(unsigned long long a)
  21251. {
  21252. unsigned long long result;
  21253. __ASM volatile("dsunpkd830 %0, %1" : "=r"(result) : "r"(a));
  21254. return result;
  21255. }
  21256. /* ===== Inline Function End for DSUNPKD830 ===== */
  21257. /* ===== Inline Function Start for DSUNPKD831 ===== */
  21258. /**
  21259. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21260. * \brief DSUNPKD831 (Signed Unpacking Bytes 3 & 1)
  21261. * \details
  21262. * **Type**: SIMD
  21263. *
  21264. * **Syntax**:\n
  21265. * ~~~
  21266. * DSUNPKD831 Rd, Rs1
  21267. * # Rd, Rs1 are all even/odd pair of registers
  21268. * ~~~
  21269. *
  21270. * **Purpose**:\n
  21271. * Unpack byte 3 and byte 1 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
  21272. *
  21273. * **Description**:\n
  21274. * For the `DSUNPKD831` instruction, it unpacks byte 3 and byte 1 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
  21275. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21276. *
  21277. * **Operations**:\n
  21278. * ~~~
  21279. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
  21280. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[1])
  21281. * ~~~
  21282. *
  21283. * \param [in] a unsigned long long type of value stored in a
  21284. * \return value stored in unsigned long long type
  21285. */
  21286. __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD831(unsigned long long a)
  21287. {
  21288. unsigned long long result;
  21289. __ASM volatile("dsunpkd831 %0, %1" : "=r"(result) : "r"(a));
  21290. return result;
  21291. }
  21292. /* ===== Inline Function End for DSUNPKD831 ===== */
  21293. /* ===== Inline Function Start for DSUNPKD832 ===== */
  21294. /**
  21295. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21296. * \brief DSUNPKD832 (Signed Unpacking Bytes 3 & 2)
  21297. * \details
  21298. * **Type**: SIMD
  21299. *
  21300. * **Syntax**:\n
  21301. * ~~~
  21302. * DSUNPKD832 Rd, Rs1
  21303. * # Rd, Rs1 are all even/odd pair of registers
  21304. * ~~~
  21305. *
  21306. * **Purpose**:\n
  21307. * Unpack byte 3 and byte 2 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
  21308. *
  21309. * **Description**:\n
  21310. * For the `DSUNPKD832` instruction, it unpacks byte 3 and byte 2 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
  21311. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21312. *
  21313. * **Operations**:\n
  21314. * ~~~
  21315. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
  21316. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[2])
  21317. * ~~~
  21318. *
  21319. * \param [in] a unsigned long long type of value stored in a
  21320. * \return value stored in unsigned long long type
  21321. */
  21322. __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD832(unsigned long long a)
  21323. {
  21324. unsigned long long result;
  21325. __ASM volatile("dsunpkd832 %0, %1" : "=r"(result) : "r"(a));
  21326. return result;
  21327. }
  21328. /* ===== Inline Function End for DSUNPKD832 ===== */
  21329. /* ===== Inline Function Start for DZUNPKD810 ===== */
  21330. /**
  21331. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21332. * \brief DZUNPKD810 (UnSigned Unpacking Bytes 1 & 0)
  21333. * \details
  21334. * **Type**: SIMD
  21335. *
  21336. * **Syntax**:\n
  21337. * ~~~
  21338. * DZUNPKD810 Rd, Rs1
  21339. * # Rd, Rs1 are all even/odd pair of registers
  21340. * ~~~
  21341. *
  21342. * **Purpose**:\n
  21343. * Unpack byte 1 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
  21344. *
  21345. * **Description**:\n
  21346. * For the `DZUNPKD810` instruction, it unpacks byte 1 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
  21347. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21348. *
  21349. * **Operations**:\n
  21350. * ~~~
  21351. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[1])
  21352. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
  21353. * ~~~
  21354. *
  21355. * \param [in] a unsigned long long type of value stored in a
  21356. * \return value stored in unsigned long long type
  21357. */
  21358. __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD810(unsigned long long a)
  21359. {
  21360. unsigned long long result;
  21361. __ASM volatile("dzunpkd810 %0, %1" : "=r"(result) : "r"(a));
  21362. return result;
  21363. }
  21364. /* ===== Inline Function End for DZUNPKD810 ===== */
  21365. /* ===== Inline Function Start for DZUNPKD820 ===== */
  21366. /**
  21367. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21368. * \brief DZUNPKD820 (UnSigned Unpacking Bytes 2 & 0)
  21369. * \details
  21370. * **Type**: SIMD
  21371. *
  21372. * **Syntax**:\n
  21373. * ~~~
  21374. * DZUNPKD820 Rd, Rs1
  21375. * # Rd, Rs1 are all even/odd pair of registers
  21376. * ~~~
  21377. *
  21378. * **Purpose**:\n
  21379. * Unpack byte 2 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
  21380. *
  21381. * **Description**:\n
  21382. * For the `DZUNPKD820` instruction, it unpacks byte 2 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
  21383. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21384. *
  21385. * **Operations**:\n
  21386. * ~~~
  21387. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[2])
  21388. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
  21389. * ~~~
  21390. *
  21391. * \param [in] a unsigned long long type of value stored in a
  21392. * \return value stored in unsigned long long type
  21393. */
  21394. __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD820(unsigned long long a)
  21395. {
  21396. unsigned long long result;
  21397. __ASM volatile("dzunpkd820 %0, %1" : "=r"(result) : "r"(a));
  21398. return result;
  21399. }
  21400. /* ===== Inline Function End for DZUNPKD820 ===== */
  21401. /* ===== Inline Function Start for DZUNPKD830 ===== */
  21402. /**
  21403. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21404. * \brief DZUNPKD830 (UnSigned Unpacking Bytes 3 & 0)
  21405. * \details
  21406. * **Type**: SIMD
  21407. *
  21408. * **Syntax**:\n
  21409. * ~~~
  21410. * DZUNPKD830 Rd, Rs1
  21411. * # Rd, Rs1 are all even/odd pair of registers
  21412. * ~~~
  21413. *
  21414. * **Purpose**:\n
  21415. * Unpack byte 3 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
  21416. *
  21417. * **Description**:\n
  21418. * For the `DZUNPKD830` instruction, it unpacks byte 3 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
  21419. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21420. *
  21421. * **Operations**:\n
  21422. * ~~~
  21423. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
  21424. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
  21425. * ~~~
  21426. *
  21427. * \param [in] a unsigned long long type of value stored in a
  21428. * \return value stored in unsigned long long type
  21429. */
  21430. __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD830(unsigned long long a)
  21431. {
  21432. unsigned long long result;
  21433. __ASM volatile("dzunpkd830 %0, %1" : "=r"(result) : "r"(a));
  21434. return result;
  21435. }
  21436. /* ===== Inline Function End for DZUNPKD830 ===== */
  21437. /* ===== Inline Function Start for DZUNPKD831 ===== */
  21438. /**
  21439. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21440. * \brief DZUNPKD831 (UnSigned Unpacking Bytes 3 & 1)
  21441. * \details
  21442. * **Type**: SIMD
  21443. *
  21444. * **Syntax**:\n
  21445. * ~~~
  21446. * DZUNPKD831 Rd, Rs1
  21447. * # Rd, Rs1 are all even/odd pair of registers
  21448. * ~~~
  21449. *
  21450. * **Purpose**:\n
  21451. * Unpack byte 3 and byte 1 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
  21452. *
  21453. * **Description**:\n
  21454. * For the `DZUNPKD831` instruction, it unpacks byte 3 and byte 1 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
  21455. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21456. *
  21457. * **Operations**:\n
  21458. * ~~~
  21459. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
  21460. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[1])
  21461. * ~~~
  21462. *
  21463. * \param [in] a unsigned long long type of value stored in a
  21464. * \return value stored in unsigned long long type
  21465. */
  21466. __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD831(unsigned long long a)
  21467. {
  21468. unsigned long long result;
  21469. __ASM volatile("dzunpkd831 %0, %1" : "=r"(result) : "r"(a));
  21470. return result;
  21471. }
  21472. /* ===== Inline Function End for DZUNPKD831 ===== */
  21473. /* ===== Inline Function Start for DZUNPKD832 ===== */
  21474. /**
  21475. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21476. * \brief DZUNPKD832 (UnSigned Unpacking Bytes 3 & 2)
  21477. * \details
  21478. * **Type**: SIMD
  21479. *
  21480. * **Syntax**:\n
  21481. * ~~~
  21482. * DZUNPKD832 Rd, Rs1
  21483. * # Rd, Rs1 are all even/odd pair of registers
  21484. * ~~~
  21485. *
  21486. * **Purpose**:\n
  21487. * Unpack byte 3 and byte 2 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
  21488. *
  21489. * **Description**:\n
  21490. * For the `DZUNPKD832` instruction, it unpacks byte 3 and byte 2 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
  21491. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21492. *
  21493. * **Operations**:\n
  21494. * ~~~
  21495. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
  21496. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[2])
  21497. * ~~~
  21498. *
  21499. * \param [in] a unsigned long long type of value stored in a
  21500. * \return value stored in unsigned long long type
  21501. */
  21502. __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD832(unsigned long long a)
  21503. {
  21504. unsigned long long result;
  21505. __ASM volatile("dzunpkd832 %0, %1" : "=r"(result) : "r"(a));
  21506. return result;
  21507. }
  21508. /* ===== Inline Function End for DZUNPKD832 ===== */
  21509. /* ===== Inline Function Start for DKMMAC ===== */
  21510. /**
  21511. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21512. * \brief DKMMAC (64-bit MSW 32x32 Signed Multiply and Saturating Add)
  21513. * \details
  21514. * **Type**: SIMD
  21515. *
  21516. * **Syntax**:\n
  21517. * ~~~
  21518. * DKMMAC Rd, Rs1, Rs2
  21519. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  21520. * ~~~
  21521. *
  21522. * **Purpose**:\n
  21523. * Do MSW 32x32 element signed multiplications and saturating addition simultaneously. The results are written into Rd.
  21524. *
  21525. * **Description**:\n
  21526. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  21527. * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
  21528. * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
  21529. * and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
  21530. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  21531. * adding a 1 to bit 31 of the results.
  21532. *
  21533. * **Operations**:\n
  21534. * ~~~
  21535. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21536. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21537. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21538. * res = sat.q31(dop + (aop s* bop)[63:32]);
  21539. * }
  21540. * Rd = concat(rest, resb);
  21541. * x=0
  21542. * ~~~
  21543. *
  21544. * \param [in] t unsigned long long type of value stored in t
  21545. * \param [in] a unsigned long long type of value stored in a
  21546. * \param [in] b unsigned long long type of value stored in b
  21547. * \return value stored in unsigned long long type
  21548. */
  21549. __STATIC_FORCEINLINE unsigned long long __RV_DKMMAC(unsigned long long t, unsigned long long a, unsigned long long b)
  21550. {
  21551. __ASM volatile("dkmmac %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21552. return t;
  21553. }
  21554. /* ===== Inline Function End for DKMMAC ===== */
  21555. /* ===== Inline Function Start for DKMMAC.u ===== */
  21556. /**
  21557. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21558. * \brief DKMMAC.u (64-bit MSW 32x32 Unsigned Multiply and Saturating Add)
  21559. * \details
  21560. * **Type**: SIMD
  21561. *
  21562. * **Syntax**:\n
  21563. * ~~~
  21564. * DKMMAC.u Rd, Rs1, Rs2
  21565. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  21566. * ~~~
  21567. *
  21568. * **Purpose**:\n
  21569. * Do MSW 32x32 element unsigned multiplications and saturating addition simultaneously. The results are written into Rd.
  21570. *
  21571. * **Description**:\n
  21572. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  21573. * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
  21574. * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
  21575. * and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
  21576. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  21577. * adding a 1 to bit 31 of the results.
  21578. *
  21579. * **Operations**:\n
  21580. * ~~~
  21581. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21582. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21583. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21584. * res = sat.q31(dop + RUND(aop u* bop)[63:32]);
  21585. * }
  21586. * Rd = concat(rest, resb);
  21587. * x=0
  21588. * ~~~
  21589. *
  21590. * \param [in] t unsigned long long type of value stored in t
  21591. * \param [in] a unsigned long long type of value stored in a
  21592. * \param [in] b unsigned long long type of value stored in b
  21593. * \return value stored in unsigned long long type
  21594. */
  21595. __STATIC_FORCEINLINE unsigned long long __RV_DKMMAC_U(unsigned long long t, unsigned long long a, unsigned long long b)
  21596. {
  21597. __ASM volatile("dkmmac.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21598. return t;
  21599. }
  21600. /* ===== Inline Function End for DKMMAC.u ===== */
  21601. /* ===== Inline Function Start for DKMMSB ===== */
  21602. /**
  21603. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21604. * \brief DKMMSB (64-bit MSW 32x32 Signed Multiply and Saturating Sub)
  21605. * \details
  21606. * **Type**: SIMD
  21607. *
  21608. * **Syntax**:\n
  21609. * ~~~
  21610. * DKMMSB Rd, Rs1, Rs2
  21611. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  21612. * ~~~
  21613. *
  21614. * **Purpose**:\n
  21615. * Do MSW 32x32 element signed multiplications and saturating subtraction simultaneously. The results are written into Rd.
  21616. *
  21617. * **Description**:\n
  21618. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  21619. * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
  21620. * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
  21621. * range and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
  21622. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  21623. * adding a 1 to bit 31 of the results.
  21624. *
  21625. * **Operations**:\n
  21626. * ~~~
  21627. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21628. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21629. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21630. * res = sat.q31(dop - (aop s* bop)[63:32]);
  21631. * }
  21632. * Rd = concat(rest, resb);
  21633. * x=0
  21634. * ~~~
  21635. *
  21636. * \param [in] t unsigned long long type of value stored in t
  21637. * \param [in] a unsigned long long type of value stored in a
  21638. * \param [in] b unsigned long long type of value stored in b
  21639. * \return value stored in unsigned long long type
  21640. */
  21641. __STATIC_FORCEINLINE unsigned long long __RV_DKMMSB(unsigned long long t, unsigned long long a, unsigned long long b)
  21642. {
  21643. __ASM volatile("dkmmsb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21644. return t;
  21645. }
  21646. /* ===== Inline Function End for DKMMSB ===== */
  21647. /* ===== Inline Function Start for DKMMSB.u ===== */
  21648. /**
  21649. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21650. * \brief DKMMSB.u (64-bit MSW 32x32 Unsigned Multiply and Saturating Sub)
  21651. * \details
  21652. * **Type**: SIMD
  21653. *
  21654. * **Syntax**:\n
  21655. * ~~~
  21656. * DKMMSB.u Rd, Rs1, Rs2
  21657. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  21658. * ~~~
  21659. *
  21660. * **Purpose**:\n
  21661. * Do MSW 32x32 element unsigned multiplications and saturating subtraction simultaneously. The results are written into Rd.
  21662. *
  21663. * **Description**:\n
  21664. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  21665. * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
  21666. * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
  21667. * range and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
  21668. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  21669. * adding a 1 to bit 31 of the results.
  21670. *
  21671. * **Operations**:\n
  21672. * ~~~
  21673. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21674. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21675. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21676. * res = sat.q31(dop - (aop u* bop)[63:32]);
  21677. * }
  21678. * Rd = concat(rest, resb);
  21679. * x=0
  21680. * ~~~
  21681. *
  21682. * \param [in] t unsigned long long type of value stored in t
  21683. * \param [in] a unsigned long long type of value stored in a
  21684. * \param [in] b unsigned long long type of value stored in b
  21685. * \return value stored in unsigned long long type
  21686. */
  21687. __STATIC_FORCEINLINE unsigned long long __RV_DKMMSB_U(unsigned long long t, unsigned long long a, unsigned long long b)
  21688. {
  21689. __ASM volatile("dkmmsb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21690. return t;
  21691. }
  21692. /* ===== Inline Function End for DKMMSB.u ===== */
  21693. /* ===== Inline Function Start for DKMADA ===== */
  21694. /**
  21695. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21696. * \brief DKMADA (Saturating Signed Multiply Two Halfs and Two Adds)
  21697. * \details
  21698. * **Type**: DSP
  21699. *
  21700. * **Syntax**:\n
  21701. * ~~~
  21702. * DKMADA Rd, Rs1, Rs2
  21703. * ~~~
  21704. *
  21705. * **Purpose**:\n
  21706. * Do two 16x16 with 32-bit signed double addition simultaneously. The results are written into Rd.
  21707. *
  21708. * **Description**:\n
  21709. * It multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  21710. * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
  21711. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
  21712. * elements in Rs2.
  21713. *
  21714. * **Operations**:\n
  21715. * ~~~
  21716. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21717. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21718. *
  21719. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21720. * mul1 = aop.H[1] s* bop.H[1];
  21721. * mul2 = aop.H[0] s* bop.H[0];
  21722. * res = sat.q31(dop + mul1 + mul2);
  21723. * }
  21724. * Rd = concat(rest, resb);
  21725. * x=0
  21726. * ~~~
  21727. *
  21728. * \param [in] t unsigned long long type of value stored in t
  21729. * \param [in] a unsigned long long type of value stored in a
  21730. * \param [in] b unsigned long long type of value stored in b
  21731. * \return value stored in unsigned long long type
  21732. */
  21733. __STATIC_FORCEINLINE unsigned long long __RV_DKMADA(unsigned long long t, unsigned long long a, unsigned long long b)
  21734. {
  21735. __ASM volatile("dkmada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21736. return t;
  21737. }
  21738. /* ===== Inline Function End for DKMADA ===== */
  21739. /* ===== Inline Function Start for DKMAXDA ===== */
  21740. /**
  21741. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21742. * \brief DKMAXDA (Two Cross 16x16 with 32-bit Signed Double Add)
  21743. * \details
  21744. * **Type**: DSP
  21745. *
  21746. * **Syntax**:\n
  21747. * ~~~
  21748. * DKMAXDA Rd, Rs1, Rs2
  21749. * ~~~
  21750. *
  21751. * **Purpose**:\n
  21752. * Do two cross 16x16 with 32-bit signed double addition simultaneously. The results are written into Rd.
  21753. *
  21754. * **Description**:\n
  21755. * It multiplies the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
  21756. * elements in Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of
  21757. * 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in elements in Rs2.
  21758. *
  21759. * **Operations**:\n
  21760. * ~~~
  21761. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21762. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21763. *
  21764. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21765. * mul1 = aop.H[1] s* bop.H[0];
  21766. * mul2 = aop.H[0] s* bop.H[1];
  21767. * res = sat.q31(dop + mul1 + mul2);
  21768. * }
  21769. * Rd = concat(rest, resb);
  21770. * x=0
  21771. * ~~~
  21772. *
  21773. * \param [in] t unsigned long long type of value stored in t
  21774. * \param [in] a unsigned long long type of value stored in a
  21775. * \param [in] b unsigned long long type of value stored in b
  21776. * \return value stored in unsigned long long type
  21777. */
  21778. __STATIC_FORCEINLINE unsigned long long __RV_DKMAXDA(unsigned long long t, unsigned long long a, unsigned long long b)
  21779. {
  21780. __ASM volatile("dkmaxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21781. return t;
  21782. }
  21783. /* ===== Inline Function End for DKMAXDA ===== */
  21784. /* ===== Inline Function Start for DKMADS ===== */
  21785. /**
  21786. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21787. * \brief DKMADS (Two 16x16 with 32-bit Signed Add and Sub)
  21788. * \details
  21789. * **Type**: DSP
  21790. *
  21791. * **Syntax**:\n
  21792. * ~~~
  21793. * DKMADS Rd, Rs1, Rs2
  21794. * ~~~
  21795. *
  21796. * **Purpose**:\n
  21797. * Do two 16x16 with 32-bit signed addition and subtraction simultaneously. The results are written into Rd.
  21798. *
  21799. * **Description**:\n
  21800. * It multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  21801. * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  21802. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
  21803. * elements in Rs2.
  21804. *
  21805. * **Operations**:\n
  21806. * ~~~
  21807. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21808. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21809. *
  21810. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21811. * mul1 = aop.H[1] s* bop.H[1];
  21812. * mul2 = aop.H[0] s* bop.H[0];
  21813. * res = sat.q31(dop + mul1 - mul2);
  21814. * }
  21815. * Rd = concat(rest, resb);
  21816. * x=0
  21817. * ~~~
  21818. *
  21819. * \param [in] t unsigned long long type of value stored in t
  21820. * \param [in] a unsigned long long type of value stored in a
  21821. * \param [in] b unsigned long long type of value stored in b
  21822. * \return value stored in unsigned long long type
  21823. */
  21824. __STATIC_FORCEINLINE unsigned long long __RV_DKMADS(unsigned long long t, unsigned long long a, unsigned long long b)
  21825. {
  21826. __ASM volatile("dkmads %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21827. return t;
  21828. }
  21829. /* ===== Inline Function End for DKMADS ===== */
  21830. /* ===== Inline Function Start for DKMADRS ===== */
  21831. /**
  21832. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21833. * \brief DKMADRS (Two 16x16 with 32-bit Signed Add and Reversed Sub)
  21834. * \details
  21835. * **Type**: DSP
  21836. *
  21837. * **Syntax**:\n
  21838. * ~~~
  21839. * DKMADRS Rd, Rs1, Rs2
  21840. * ~~~
  21841. *
  21842. * **Purpose**:\n
  21843. * Do two 16x16 with 32-bit signed addition and revered subtraction simultaneously. The results are written into Rd.
  21844. *
  21845. * **Description**:\n
  21846. * it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  21847. * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  21848. * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
  21849. * bit elements in Rs2
  21850. *
  21851. * **Operations**:\n
  21852. * ~~~
  21853. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21854. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21855. *
  21856. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21857. * mul1 = aop.H[1] s* bop.H[1];
  21858. * mul2 = aop.H[0] s* bop.H[0];
  21859. * res = sat.q31(dop - mul1 + mul2);
  21860. * }
  21861. * Rd = concat(rest, resb);
  21862. * x=0
  21863. * ~~~
  21864. *
  21865. * \param [in] t unsigned long long type of value stored in t
  21866. * \param [in] a unsigned long long type of value stored in a
  21867. * \param [in] b unsigned long long type of value stored in b
  21868. * \return value stored in unsigned long long type
  21869. */
  21870. __STATIC_FORCEINLINE unsigned long long __RV_DKMADRS(unsigned long long t, unsigned long long a, unsigned long long b)
  21871. {
  21872. __ASM volatile("dkmadrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21873. return t;
  21874. }
  21875. /* ===== Inline Function End for DKMADRS ===== */
  21876. /* ===== Inline Function Start for DKMAXDS ===== */
  21877. /**
  21878. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21879. * \brief DKMAXDS (Saturating Signed Crossed Multiply Two Halfs & Subtract & Add)
  21880. * \details
  21881. * **Type**: DSP
  21882. *
  21883. * **Syntax**:\n
  21884. * ~~~
  21885. * DKMAXDS Rd, Rs1, Rs2
  21886. * ~~~
  21887. *
  21888. * **Purpose**:\n
  21889. * Do two cross 16x16 with 32-bit signed addition and subtraction simultaneously. The results are written into Rd.
  21890. *
  21891. * **Description**:\n
  21892. * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
  21893. * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
  21894. * the corresponding 32-bit elements in a third register. The addition result may be saturated.
  21895. *
  21896. * **Operations**:\n
  21897. * ~~~
  21898. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21899. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21900. *
  21901. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21902. * mul1 = aop.H[1] s* bop.H[0];
  21903. * mul2 = aop.H[0] s* bop.H[1];
  21904. * res = sat.q31(dop + mul1 - mul2);
  21905. * }
  21906. * Rd = concat(rest, resb);
  21907. * x=0
  21908. * ~~~
  21909. *
  21910. * \param [in] t unsigned long long type of value stored in t
  21911. * \param [in] a unsigned long long type of value stored in a
  21912. * \param [in] b unsigned long long type of value stored in b
  21913. * \return value stored in unsigned long long type
  21914. */
  21915. __STATIC_FORCEINLINE unsigned long long __RV_DKMAXDS(unsigned long long t, unsigned long long a, unsigned long long b)
  21916. {
  21917. __ASM volatile("dkmaxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21918. return t;
  21919. }
  21920. /* ===== Inline Function End for DKMAXDS ===== */
  21921. /* ===== Inline Function Start for DKMSDA ===== */
  21922. /**
  21923. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21924. * \brief DKMSDA (Two 16x16 with 32-bit Signed Double Sub)
  21925. * \details
  21926. * **Type**: DSP
  21927. *
  21928. * **Syntax**:\n
  21929. * ~~~
  21930. * DKMSDA Rd, Rs1, Rs2
  21931. * ~~~
  21932. *
  21933. * **Purpose**:\n
  21934. * Do two 16x16 with 32-bit signed double subtraction simultaneously. The results are written into Rd.
  21935. *
  21936. * **Description**:\n
  21937. * it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  21938. * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
  21939. * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  21940. *
  21941. * **Operations**:\n
  21942. * ~~~
  21943. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21944. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21945. *
  21946. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21947. * mul1 = aop.H[1] s* bop.H[0];
  21948. * mul2 = aop.H[0] s* bop.H[1];
  21949. * res = sat.q31(dop - mul1 - mul2);
  21950. * }
  21951. * Rd = concat(rest, resb);
  21952. * x=0
  21953. * ~~~
  21954. *
  21955. * \param [in] t unsigned long long type of value stored in t
  21956. * \param [in] a unsigned long long type of value stored in a
  21957. * \param [in] b unsigned long long type of value stored in b
  21958. * \return value stored in unsigned long long type
  21959. */
  21960. __STATIC_FORCEINLINE unsigned long long __RV_DKMSDA(unsigned long long t, unsigned long long a, unsigned long long b)
  21961. {
  21962. __ASM volatile("dkmsda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21963. return t;
  21964. }
  21965. /* ===== Inline Function End for DKMSDA ===== */
  21966. /* ===== Inline Function Start for DKMSXDA ===== */
  21967. /**
  21968. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21969. * \brief DKMSXDA (Two Cross 16x16 with 32-bit Signed Double Sub)
  21970. * \details
  21971. * **Type**: DSP
  21972. *
  21973. * **Syntax**:\n
  21974. * ~~~
  21975. * DKMSXDA Rd, Rs1, Rs2
  21976. * ~~~
  21977. *
  21978. * **Purpose**:\n
  21979. * Do two cross 16x16 with 32-bit signed double subtraction simultaneously. The results are written into Rd.
  21980. *
  21981. * **Description**:\n
  21982. * It multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  21983. * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
  21984. * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
  21985. *
  21986. * **Operations**:\n
  21987. * ~~~
  21988. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21989. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21990. *
  21991. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21992. * mul1 = aop.H[1] s* bop.H[0];
  21993. * mul2 = aop.H[0] s* bop.H[1];
  21994. * res = sat.q31(dop - mul1 - mul2);
  21995. * }
  21996. * Rd = concat(rest, resb);
  21997. * x=0
  21998. * ~~~
  21999. *
  22000. * \param [in] t unsigned long long type of value stored in t
  22001. * \param [in] a unsigned long long type of value stored in a
  22002. * \param [in] b unsigned long long type of value stored in b
  22003. * \return value stored in unsigned long long type
  22004. */
  22005. __STATIC_FORCEINLINE unsigned long long __RV_DKMSXDA(unsigned long long t, unsigned long long a, unsigned long long b)
  22006. {
  22007. __ASM volatile("dkmsxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22008. return t;
  22009. }
  22010. /* ===== Inline Function End for DKMSXDA ===== */
  22011. /* ===== Inline Function Start for DSMAQA ===== */
  22012. /**
  22013. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22014. * \brief DSMAQA (Four Signed 8x8 with 32-bit Signed Add)
  22015. * \details
  22016. * **Type**: DSP
  22017. *
  22018. * **Syntax**:\n
  22019. * ~~~
  22020. * DSMAQA Rd, Rs1, Rs2
  22021. * ~~~
  22022. *
  22023. * **Purpose**:\n
  22024. * Do four signed 8x8 with 32-bit signed addition simultaneously. The results are written into Rd.
  22025. *
  22026. * **Description**:\n
  22027. * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
  22028. * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the signed
  22029. * content of the corresponding 32-bit chunks of Rd. The final results are written back to the
  22030. * corresponding 32-bit chunks in Rd.
  22031. *
  22032. * **Operations**:\n
  22033. * ~~~
  22034. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  22035. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  22036. *
  22037. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  22038. * m0 = aop.B[0] s* bop.B[0];
  22039. * m1 = aop.B[1] s* bop.B[1];
  22040. * m2 = aop.B[2] s* bop.B[2];
  22041. * m3 = aop.B[3] s* bop.B[3];
  22042. * res = dop + m0 + m1 + m2 + m3;
  22043. * }
  22044. * Rd = concat(rest, resb);
  22045. * x=0
  22046. * ~~~
  22047. *
  22048. * \param [in] t unsigned long long type of value stored in t
  22049. * \param [in] a unsigned long long type of value stored in a
  22050. * \param [in] b unsigned long long type of value stored in b
  22051. * \return value stored in unsigned long long type
  22052. */
  22053. __STATIC_FORCEINLINE unsigned long long __RV_DSMAQA(unsigned long long t, unsigned long long a, unsigned long long b)
  22054. {
  22055. __ASM volatile("dsmaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22056. return t;
  22057. }
  22058. /* ===== Inline Function End for DSMAQA ===== */
  22059. /* ===== Inline Function Start for DSMAQA.SU ===== */
  22060. /**
  22061. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22062. * \brief DSMAQA.SU (Four Signed 8 x Unsigned 8 with 32-bit Signed Add)
  22063. * \details
  22064. * **Type**: DSP
  22065. *
  22066. * **Syntax**:\n
  22067. * ~~~
  22068. * DSMAQA.SU Rd, Rs1, Rs2
  22069. * ~~~
  22070. *
  22071. * **Purpose**:\n
  22072. * Do four Signed 8 x Unsigned 8 with 32-bit unsigned addition simultaneously. The results are written into Rd.
  22073. *
  22074. * **Description**:\n
  22075. * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four
  22076. * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the unsigned
  22077. * content of the corresponding 32-bit chunks of Rd. The final results are written back to the
  22078. * corresponding 32-bit chunks in Rd.
  22079. *
  22080. * **Operations**:\n
  22081. * ~~~
  22082. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  22083. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  22084. *
  22085. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  22086. * m0 = aop.B[0] su* bop.B[0];
  22087. * m1 = aop.B[1] su* bop.B[1];
  22088. * m2 = aop.B[2] su* bop.B[2];
  22089. * m3 = aop.B[3] su* bop.B[3];
  22090. * res = dop + m0 + m1 + m2 + m3;
  22091. * }
  22092. * Rd = concat(rest, resb);
  22093. * x=0
  22094. * ~~~
  22095. *
  22096. * \param [in] t unsigned long long type of value stored in t
  22097. * \param [in] a unsigned long long type of value stored in a
  22098. * \param [in] b unsigned long long type of value stored in b
  22099. * \return value stored in unsigned long long type
  22100. */
  22101. __STATIC_FORCEINLINE unsigned long long __RV_DSMAQA_SU(unsigned long long t, unsigned long long a, unsigned long long b)
  22102. {
  22103. __ASM volatile("dsmaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22104. return t;
  22105. }
  22106. /* ===== Inline Function End for DSMAQA.SU ===== */
  22107. /* ===== Inline Function Start for DUMAQA ===== */
  22108. /**
  22109. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22110. * \brief DUMAQA (Four Unsigned 8x8 with 32-bit Unsigned Add)
  22111. * \details
  22112. * **Type**: DSP
  22113. *
  22114. * **Syntax**:\n
  22115. * ~~~
  22116. * DUMAQA Rd, Rs1, Rs2
  22117. * ~~~
  22118. *
  22119. * **Purpose**:\n
  22120. * Do four unsigned 8x8 with 32-bit unsigned addition simultaneously. The results are written into Rd.
  22121. *
  22122. * **Description**:\n
  22123. * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four
  22124. * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
  22125. * unsigned content of the corresponding 32-bit chunks of Rd. The final results are written back to the
  22126. * corresponding 32-bit chunks in Rd.
  22127. *
  22128. * **Operations**:\n
  22129. * ~~~
  22130. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  22131. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  22132. *
  22133. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  22134. * m0 = aop.B[0] su* bop.B[0];
  22135. * m1 = aop.B[1] su* bop.B[1];
  22136. * m2 = aop.B[2] su* bop.B[2];
  22137. * m3 = aop.B[3] su* bop.B[3];
  22138. * res = dop + m0 + m1 + m2 + m3;
  22139. * }
  22140. * Rd = concat(rest, resb);
  22141. * x=0
  22142. * ~~~
  22143. *
  22144. * \param [in] t unsigned long long type of value stored in t
  22145. * \param [in] a unsigned long long type of value stored in a
  22146. * \param [in] b unsigned long long type of value stored in b
  22147. * \return value stored in unsigned long long type
  22148. */
  22149. __STATIC_FORCEINLINE unsigned long long __RV_DUMAQA(unsigned long long t, unsigned long long a, unsigned long long b)
  22150. {
  22151. __ASM volatile("dumaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22152. return t;
  22153. }
  22154. /* ===== Inline Function End for DUMAQA ===== */
  22155. /* ===== Inline Function Start for DKMDA32 ===== */
  22156. /**
  22157. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22158. * \brief DKMDA32 (Two Signed 32x32 with 64-bit Saturation Add)
  22159. * \details
  22160. * **Type**: DSP
  22161. *
  22162. * **Syntax**:\n
  22163. * ~~~
  22164. * DKMDA32 Rd, Rs1, Rs2
  22165. * ~~~
  22166. *
  22167. * **Purpose**:\n
  22168. * Do two signed 32x32 add the signed multiplication results with Q63 saturation. The results are written into Rd.
  22169. *
  22170. * **Description**:\n
  22171. * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  22172. * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
  22173. * with the top 32-bit element of Rs2.
  22174. *
  22175. * **Operations**:\n
  22176. * ~~~
  22177. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22178. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22179. * t0 = op1b s* op2b;
  22180. * t1 = op1t s* op2t;
  22181. * Rd = concat(rest, resb);
  22182. * x=0
  22183. * ~~~
  22184. *
  22185. * \param [in] a unsigned long long type of value stored in a
  22186. * \param [in] b unsigned long long type of value stored in b
  22187. * \return value stored in long long type
  22188. */
  22189. __STATIC_FORCEINLINE long long __RV_DKMDA32(unsigned long long a, unsigned long long b)
  22190. {
  22191. long long result;
  22192. __ASM volatile("dkmda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  22193. return result;
  22194. }
  22195. /* ===== Inline Function End for DKMDA32 ===== */
  22196. /* ===== Inline Function Start for DKMXDA32 ===== */
  22197. /**
  22198. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22199. * \brief DKMXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Add)
  22200. * \details
  22201. * **Type**: DSP
  22202. *
  22203. * **Syntax**:\n
  22204. * ~~~
  22205. * DKMXDA32 Rd, Rs1, Rs2
  22206. * ~~~
  22207. *
  22208. * **Purpose**:\n
  22209. * Do two cross signed 32x32 and add the signed multiplication results with Q63 saturation. The results are written into Rd.
  22210. *
  22211. * **Description**:\n
  22212. * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  22213. * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
  22214. * with the bottom 32-bit element of Rs2.
  22215. *
  22216. * **Operations**:\n
  22217. * ~~~
  22218. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22219. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22220. * t01 = op1b s* op2t;
  22221. * t10 = op1t s* op2b;
  22222. * Rd = sat.q63(t01 + t10);
  22223. * x=0
  22224. * ~~~
  22225. *
  22226. * \param [in] a unsigned long long type of value stored in a
  22227. * \param [in] b unsigned long long type of value stored in b
  22228. * \return value stored in long long type
  22229. */
  22230. __STATIC_FORCEINLINE long long __RV_DKMXDA32(unsigned long long a, unsigned long long b)
  22231. {
  22232. long long result;
  22233. __ASM volatile("dkmxda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  22234. return result;
  22235. }
  22236. /* ===== Inline Function End for DKMXDA32 ===== */
  22237. /* ===== Inline Function Start for DKMADA32 ===== */
  22238. /**
  22239. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22240. * \brief DKMADA32 (Two Signed 32x32 with 64-bit Saturation Add)
  22241. * \details
  22242. * **Type**: DSP
  22243. *
  22244. * **Syntax**:\n
  22245. * ~~~
  22246. * DKMADA32 Rd, Rs1, Rs2
  22247. * ~~~
  22248. *
  22249. * **Purpose**:\n
  22250. * Do two signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The results are written into Rd.
  22251. *
  22252. * **Description**:\n
  22253. * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  22254. * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
  22255. * with the top 32-bit element of Rs2.
  22256. *
  22257. * **Operations**:\n
  22258. * ~~~
  22259. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22260. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22261. * t01 = op1b s* op2b;
  22262. * t10 = op1t s* op2t;
  22263. * Rd = sat.q63(t01 + t10);
  22264. * x=0
  22265. * ~~~
  22266. *
  22267. * \param [in] t long long type of value stored in t
  22268. * \param [in] a unsigned long long type of value stored in a
  22269. * \param [in] b unsigned long long type of value stored in b
  22270. * \return value stored in long long type
  22271. */
  22272. __STATIC_FORCEINLINE long long __RV_DKMADA32(long long t, unsigned long long a, unsigned long long b)
  22273. {
  22274. __ASM volatile("dkmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22275. return t;
  22276. }
  22277. /* ===== Inline Function End for DKMADA32 ===== */
  22278. /* ===== Inline Function Start for DKMAXDA32 ===== */
  22279. /**
  22280. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22281. * \brief DKMAXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Add)
  22282. * \details
  22283. * **Type**: DSP
  22284. *
  22285. * **Syntax**:\n
  22286. * ~~~
  22287. * DKMAXDA32 Rd, Rs1, Rs2
  22288. * ~~~
  22289. *
  22290. * **Purpose**:\n
  22291. * Do two cross signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The
  22292. * results are written into Rd.
  22293. *
  22294. * **Description**:\n
  22295. * It multiplies the top 32-bit element in Rs1 with the bottom 32-bit
  22296. * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
  22297. * with the top 32-bit element in Rs2.
  22298. *
  22299. * **Operations**:\n
  22300. * ~~~
  22301. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22302. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22303. * t01 = op1b s* op2t;
  22304. * t10 = op1t s* op2b;
  22305. * Rd = sat.q63(Rd + t01 + t10);
  22306. * x=0
  22307. * ~~~
  22308. *
  22309. * \param [in] t long long type of value stored in t
  22310. * \param [in] a unsigned long long type of value stored in a
  22311. * \param [in] b unsigned long long type of value stored in b
  22312. * \return value stored in long long type
  22313. */
  22314. __STATIC_FORCEINLINE long long __RV_DKMAXDA32(long long t, unsigned long long a, unsigned long long b)
  22315. {
  22316. __ASM volatile("dkmaxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22317. return t;
  22318. }
  22319. /* ===== Inline Function End for DKMAXDA32 ===== */
  22320. /* ===== Inline Function Start for DKMADS32 ===== */
  22321. /**
  22322. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22323. * \brief DKMADS32 (Two Signed 32x32 with 64-bit Saturation Add and Sub)
  22324. * \details
  22325. * **Type**: DSP
  22326. *
  22327. * **Syntax**:\n
  22328. * ~~~
  22329. * DKMADS32 Rd, Rs1, Rs2
  22330. * ~~~
  22331. *
  22332. * **Purpose**:\n
  22333. * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication results
  22334. * and add a third register with Q63 saturation. The results are written into Rd.
  22335. *
  22336. * **Description**:\n
  22337. * It multiplies the top 32-bit element in Rs1 with the bottom 32-bit
  22338. * element in Rs2 and then subtracts the result to the result of multiplying the top 32-bit element in Rs1
  22339. * with the top 32-bit element in Rs2.
  22340. *
  22341. * **Operations**:\n
  22342. * ~~~
  22343. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22344. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22345. *
  22346. * t0 = op1b s* op2b;
  22347. * t1 = op1t s* op2t;
  22348. * Rd = sat.q63(Rd - t0 + t1);
  22349. * x=0
  22350. * ~~~
  22351. *
  22352. * \param [in] t long long type of value stored in t
  22353. * \param [in] a unsigned long long type of value stored in a
  22354. * \param [in] b unsigned long long type of value stored in b
  22355. * \return value stored in long long type
  22356. */
  22357. __STATIC_FORCEINLINE long long __RV_DKMADS32(long long t, unsigned long long a, unsigned long long b)
  22358. {
  22359. __ASM volatile("dkmads32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22360. return t;
  22361. }
  22362. /* ===== Inline Function End for DKMADS32 ===== */
  22363. /* ===== Inline Function Start for DKMADRS32 ===== */
  22364. /**
  22365. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22366. * \brief DKMADRS32 (Two Signed 32x32 with 64-bit Saturation Revered Add and Sub)
  22367. * \details
  22368. * **Type**: DSP
  22369. *
  22370. * **Syntax**:\n
  22371. * ~~~
  22372. * DKMADRS32 Rd, Rs1, Rs2
  22373. * ~~~
  22374. *
  22375. * **Purpose**:\n
  22376. * Do two signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The results
  22377. * are written into Rd.Do two signed 32x32 and subtraction the top signed multiplication results and add bottom signed
  22378. * multiplication results and add a third register with Q63 saturation. The results are written into Rd.
  22379. *
  22380. * **Description**:\n
  22381. * It multiplies the top 32-bit element in Rs1 with the top 32-bit
  22382. * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  22383. * element in Rs1 with the bottom 32-bit element in Rs2.
  22384. *
  22385. * **Operations**:\n
  22386. * ~~~
  22387. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22388. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22389. * t0 = op1b s* op2b;
  22390. * t1 = op1t s* op2t;
  22391. * Rd = sat.q63(Rd + t0 - t1);
  22392. * x=0
  22393. * ~~~
  22394. *
  22395. * \param [in] t long long type of value stored in t
  22396. * \param [in] a unsigned long long type of value stored in a
  22397. * \param [in] b unsigned long long type of value stored in b
  22398. * \return value stored in long long type
  22399. */
  22400. __STATIC_FORCEINLINE long long __RV_DKMADRS32(long long t, unsigned long long a, unsigned long long b)
  22401. {
  22402. __ASM volatile("dkmadrs32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22403. return t;
  22404. }
  22405. /* ===== Inline Function End for DKMADRS32 ===== */
  22406. /* ===== Inline Function Start for DKMAXDS32 ===== */
  22407. /**
  22408. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22409. * \brief DKMAXDS32 (Two Cross Signed 32x32 with 64-bit Saturation Add and Sub)
  22410. * \details
  22411. * **Type**: DSP
  22412. *
  22413. * **Syntax**:\n
  22414. * ~~~
  22415. * DKMAXDS32 Rd, Rs1, Rs2
  22416. * ~~~
  22417. *
  22418. * **Purpose**:\n
  22419. * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication results
  22420. * and add a third register with Q63 saturation. The results are written into Rd.
  22421. *
  22422. * **Description**:\n
  22423. * It multiplies the bottom 32-bit element in Rs1 with the top 32-bit
  22424. * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
  22425. * Rs1 with the bottom 32-bit element in Rs2.
  22426. *
  22427. * **Operations**:\n
  22428. * ~~~
  22429. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22430. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22431. *
  22432. * t01 = op1b s* op2t;
  22433. * t10 = op1t s* op2b;
  22434. * Rd = sat.q63(Rd - t01 + t10);
  22435. * x=0
  22436. * ~~~
  22437. *
  22438. * \param [in] t long long type of value stored in t
  22439. * \param [in] a unsigned long long type of value stored in a
  22440. * \param [in] b unsigned long long type of value stored in b
  22441. * \return value stored in long long type
  22442. */
  22443. __STATIC_FORCEINLINE long long __RV_DKMAXDS32(long long t, unsigned long long a, unsigned long long b)
  22444. {
  22445. __ASM volatile("dkmaxds32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22446. return t;
  22447. }
  22448. /* ===== Inline Function End for DKMAXDS32 ===== */
  22449. /* ===== Inline Function Start for DKMSDA32 ===== */
  22450. /**
  22451. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22452. * \brief DKMSDA32 (Two Signed 32x32 with 64-bit Saturation Sub)
  22453. * \details
  22454. * **Type**: DSP
  22455. *
  22456. * **Syntax**:\n
  22457. * ~~~
  22458. * DKMSDA32 Rd, Rs1, Rs2
  22459. * ~~~
  22460. *
  22461. * **Purpose**:\n
  22462. * Do two signed 32x32 and subtraction the top signed multiplication results and subtraction bottom signed multiplication
  22463. * results and add a third register with Q63 saturation. The results are written into Rd.
  22464. *
  22465. * **Description**:\n
  22466. * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  22467. * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
  22468. *
  22469. * **Operations**:\n
  22470. * ~~~
  22471. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22472. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22473. *
  22474. * t0 = op1b s* op2b;
  22475. * t1 = op1t s* op2t;
  22476. * Rd = sat.q63(Rd - t0 - t1);
  22477. * x=0
  22478. * ~~~
  22479. *
  22480. * \param [in] t long long type of value stored in t
  22481. * \param [in] a unsigned long long type of value stored in a
  22482. * \param [in] b unsigned long long type of value stored in b
  22483. * \return value stored in long long type
  22484. */
  22485. __STATIC_FORCEINLINE long long __RV_DKMSDA32(long long t, unsigned long long a, unsigned long long b)
  22486. {
  22487. __ASM volatile("dkmsda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22488. return t;
  22489. }
  22490. /* ===== Inline Function End for DKMSDA32 ===== */
  22491. /* ===== Inline Function Start for DKMSXDA32 ===== */
  22492. /**
  22493. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22494. * \brief DKMSXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Sub)
  22495. * \details
  22496. * **Type**: DSP
  22497. *
  22498. * **Syntax**:\n
  22499. * ~~~
  22500. * DKMSXDA32 Rd, Rs1, Rs2
  22501. * ~~~
  22502. *
  22503. * **Purpose**:\n
  22504. * Do two cross signed 32x32 and subtraction the top signed multiplication results and subtraction bottom signed multiplication
  22505. * results and add a third register with Q63 saturation. The results are written into Rd.
  22506. *
  22507. * **Description**:\n
  22508. * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  22509. * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
  22510. *
  22511. * **Operations**:\n
  22512. * ~~~
  22513. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22514. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22515. *
  22516. * t0 = op1b s* op2t;
  22517. * t1 = op1t s* op2b;
  22518. * Rd = sat.q63(Rd - t0 - t1);
  22519. * x=0
  22520. * ~~~
  22521. *
  22522. * \param [in] t long long type of value stored in t
  22523. * \param [in] a unsigned long long type of value stored in a
  22524. * \param [in] b unsigned long long type of value stored in b
  22525. * \return value stored in long long type
  22526. */
  22527. __STATIC_FORCEINLINE long long __RV_DKMSXDA32(long long t, unsigned long long a, unsigned long long b)
  22528. {
  22529. __ASM volatile("dkmsxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22530. return t;
  22531. }
  22532. /* ===== Inline Function End for DKMSXDA32 ===== */
  22533. /* ===== Inline Function Start for DSMDS32 ===== */
  22534. /**
  22535. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22536. * \brief DSMDS32 (Two Signed 32x32 with 64-bit Sub)
  22537. * \details
  22538. * **Type**: DSP
  22539. *
  22540. * **Syntax**:\n
  22541. * ~~~
  22542. * DSMDS32 Rd, Rs1, Rs2
  22543. * ~~~
  22544. *
  22545. * **Purpose**:\n
  22546. * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication. The
  22547. * results are written into Rd.
  22548. *
  22549. * **Description**:\n
  22550. * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  22551. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  22552. * Rs1 with the top 32-bit element of Rs2.
  22553. *
  22554. * **Operations**:\n
  22555. * ~~~
  22556. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22557. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22558. *
  22559. * t0 = op1b s* op2t;
  22560. * t1 = op1t s* op2b;
  22561. * Rd = t1 - t0;
  22562. * x=0
  22563. * ~~~
  22564. *
  22565. * \param [in] a unsigned long long type of value stored in a
  22566. * \param [in] b unsigned long long type of value stored in b
  22567. * \return value stored in long long type
  22568. */
  22569. __STATIC_FORCEINLINE long long __RV_DSMDS32(unsigned long long a, unsigned long long b)
  22570. {
  22571. long long result;
  22572. __ASM volatile("dsmds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  22573. return result;
  22574. }
  22575. /* ===== Inline Function End for DSMDS32 ===== */
  22576. /* ===== Inline Function Start for DSMDRS32 ===== */
  22577. /**
  22578. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22579. * \brief DSMDRS32 (Two Signed 32x32 with 64-bit Revered Sub)
  22580. * \details
  22581. * **Type**: DSP
  22582. *
  22583. * **Syntax**:\n
  22584. * ~~~
  22585. * DSMDRS32 Rd, Rs1, Rs2
  22586. * ~~~
  22587. *
  22588. * **Purpose**:\n
  22589. * Do two signed 32x32 and subtraction the top signed multiplication results and add bottom signed multiplication. The results are written into Rd
  22590. *
  22591. * **Description**:\n
  22592. * It multiplies the top 32-bit element of Rs1 with the top 32-bit
  22593. * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  22594. * element of Rs1 with the bottom 32-bit element of Rs2.
  22595. *
  22596. * **Operations**:\n
  22597. * ~~~
  22598. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22599. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22600. *
  22601. * t0 = op1b s* op2b;
  22602. * t1 = op1t s* op2t;
  22603. * Rd = t1 - t0;
  22604. * x=0
  22605. * ~~~
  22606. *
  22607. * \param [in] a unsigned long long type of value stored in a
  22608. * \param [in] b unsigned long long type of value stored in b
  22609. * \return value stored in long long type
  22610. */
  22611. __STATIC_FORCEINLINE long long __RV_DSMDRS32(unsigned long long a, unsigned long long b)
  22612. {
  22613. long long result;
  22614. __ASM volatile("dsmdrs32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  22615. return result;
  22616. }
  22617. /* ===== Inline Function End for DSMDRS32 ===== */
  22618. /* ===== Inline Function Start for DSMXDS32 ===== */
  22619. /**
  22620. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22621. * \brief DSMXDS32 (Two Cross Signed 32x32 with 64-bit Sub)
  22622. * \details
  22623. * **Type**: DSP
  22624. *
  22625. * **Syntax**:\n
  22626. * ~~~
  22627. * DSMXDS32 Rd, Rs1, Rs2
  22628. * ~~~
  22629. *
  22630. * **Purpose**:\n
  22631. * Do two cross signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication.
  22632. * The results are written into Rd.
  22633. *
  22634. * **Description**:\n
  22635. * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  22636. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  22637. * Rs1 with the bottom 32-bit element of Rs2.
  22638. *
  22639. * **Operations**:\n
  22640. * ~~~
  22641. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22642. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22643. *
  22644. * t01 = op1b s* op2t;
  22645. * t10 = op1t s* op2b;
  22646. * Rd = t1 - t0;
  22647. * x=0
  22648. * ~~~
  22649. *
  22650. * \param [in] a unsigned long long type of value stored in a
  22651. * \param [in] b unsigned long long type of value stored in b
  22652. * \return value stored in long long type
  22653. */
  22654. __STATIC_FORCEINLINE long long __RV_DSMXDS32(unsigned long long a, unsigned long long b)
  22655. {
  22656. long long result;
  22657. __ASM volatile("dsmxds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  22658. return result;
  22659. }
  22660. /* ===== Inline Function End for DSMXDS32 ===== */
  22661. /* ===== Inline Function Start for DSMALDA ===== */
  22662. /**
  22663. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22664. * \brief DSMALDA (Four Signed 16x16 with 64-bit Add)
  22665. * \details
  22666. * **Type**: DSP
  22667. *
  22668. * **Syntax**:\n
  22669. * ~~~
  22670. * DSMALDA Rd, Rs1, Rs2
  22671. * ~~~
  22672. *
  22673. * **Purpose**:\n
  22674. * Do four signed 16x16 and add signed multiplication results and a third register. The results are written into Rd.
  22675. *
  22676. * **Description**:\n
  22677. * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  22678. * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
  22679. * the top 16-bit content of Rs2 with unlimited precision
  22680. *
  22681. * **Operations**:\n
  22682. * ~~~
  22683. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22684. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22685. *
  22686. * m0 = op1b.H[0] s* op2b.H[0];
  22687. * m1 = op1b.H[1] s* op2b.H[1];
  22688. * m2 = op1t.H[0] s* op2t.H[0];
  22689. * m3 = op1t.H[1] s* op2t.H[1];
  22690. *
  22691. * Rd = Rd + m0 + m1 + m2 + m3;
  22692. * x=0
  22693. * ~~~
  22694. *
  22695. * \param [in] t long long type of value stored in t
  22696. * \param [in] a unsigned long long type of value stored in a
  22697. * \param [in] b unsigned long long type of value stored in b
  22698. * \return value stored in long long type
  22699. */
  22700. __STATIC_FORCEINLINE long long __RV_DSMALDA(long long t, unsigned long long a, unsigned long long b)
  22701. {
  22702. __ASM volatile("dsmalda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22703. return t;
  22704. }
  22705. /* ===== Inline Function End for DSMALDA ===== */
  22706. /* ===== Inline Function Start for DSMALXDA ===== */
  22707. /**
  22708. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22709. * \brief DSMALXDA (Four Signed 16x16 with 64-bit Add)
  22710. * \details
  22711. * **Type**: DSP
  22712. *
  22713. * **Syntax**:\n
  22714. * ~~~
  22715. * DSMALXDA Rd, Rs1, Rs2
  22716. * ~~~
  22717. *
  22718. * **Purpose**:\n
  22719. * Do four cross signed 16x16 and add signed multiplication results and a third register. The results are written into Rd.
  22720. *
  22721. * **Description**:\n
  22722. * It multiplies the top 16-bit content of Rs1 with the bottom 16-bit
  22723. * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
  22724. * with the top 16-bit content of Rs2 with unlimited precision.
  22725. *
  22726. * **Operations**:\n
  22727. * ~~~
  22728. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22729. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22730. *
  22731. * m0 = op1b.H[0] s* op2b.H[1];
  22732. * m1 = op1b.H[1] s* op2b.H[0];
  22733. * m2 = op1t.H[0] s* op2t.H[1];
  22734. * m3 = op1t.H[1] s* op2t.H[0];
  22735. *
  22736. * Rd = Rd + m0 + m1 + m2 + m3;
  22737. * x=0
  22738. * ~~~
  22739. *
  22740. * \param [in] t long long type of value stored in t
  22741. * \param [in] a unsigned long long type of value stored in a
  22742. * \param [in] b unsigned long long type of value stored in b
  22743. * \return value stored in long long type
  22744. */
  22745. __STATIC_FORCEINLINE long long __RV_DSMALXDA(long long t, unsigned long long a, unsigned long long b)
  22746. {
  22747. __ASM volatile("dsmalxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22748. return t;
  22749. }
  22750. /* ===== Inline Function End for DSMALXDA ===== */
  22751. /* ===== Inline Function Start for DSMALDS ===== */
  22752. /**
  22753. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22754. * \brief DSMALDS (Four Signed 16x16 with 64-bit Add and Sub)
  22755. * \details
  22756. * **Type**: DSP
  22757. *
  22758. * **Syntax**:\n
  22759. * ~~~
  22760. * DSMALDS Rd, Rs1, Rs2
  22761. * ~~~
  22762. *
  22763. * **Purpose**:\n
  22764. * Do four signed 16x16 and add and subtraction signed multiplication results and a third register. The results are written into Rd.
  22765. *
  22766. * **Description**:\n
  22767. * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  22768. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  22769. * Rs1 with the top 16-bit content of Rs2.
  22770. *
  22771. * **Operations**:\n
  22772. * ~~~
  22773. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22774. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22775. *
  22776. * m0 = op1b.H[1] s* op2b.H[1];
  22777. * m1 = op1b.H[0] s* op2b.H[0];
  22778. * m2 = op1t.H[1] s* op2t.H[1];
  22779. * m3 = op1t.H[0] s* op2t.H[0];
  22780. *
  22781. * Rd = Rd + m0 - m1 + m2 - m3;
  22782. * x=0
  22783. * ~~~
  22784. *
  22785. * \param [in] t long long type of value stored in t
  22786. * \param [in] a unsigned long long type of value stored in a
  22787. * \param [in] b unsigned long long type of value stored in b
  22788. * \return value stored in long long type
  22789. */
  22790. __STATIC_FORCEINLINE long long __RV_DSMALDS(long long t, unsigned long long a, unsigned long long b)
  22791. {
  22792. __ASM volatile("dsmalds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22793. return t;
  22794. }
  22795. /* ===== Inline Function End for DSMALDS ===== */
  22796. /* ===== Inline Function Start for DSMALDRS ===== */
  22797. /**
  22798. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22799. * \brief DSMALDRS (Four Signed 16x16 with 64-bit Add and Revered Sub)
  22800. * \details
  22801. * **Type**: DSP
  22802. *
  22803. * **Syntax**:\n
  22804. * ~~~
  22805. * DSMALDRS Rd, Rs1, Rs2
  22806. * ~~~
  22807. *
  22808. * **Purpose**:\n
  22809. * Do two signed 16x16 and add and revered subtraction signed multiplication results and a third register. The results are written into Rd.
  22810. *
  22811. * **Description**:\n
  22812. * It multiplies the top 16-bit content of Rs1 with the top 16-bit content
  22813. * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
  22814. * with the bottom 16-bit content of Rs2.
  22815. *
  22816. * **Operations**:\n
  22817. * ~~~
  22818. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22819. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22820. *
  22821. * m0 = op1b.H[0] s* op2b.H[0];
  22822. * m1 = op1b.H[1] s* op2b.H[1];
  22823. * m2 = op1t.H[0] s* op2t.H[0];
  22824. * m3 = op1t.H[1] s* op2t.H[1];
  22825. *
  22826. * Rd = Rd + m0 - m1 + m2 - m3;
  22827. * x=0
  22828. * ~~~
  22829. *
  22830. * \param [in] t long long type of value stored in t
  22831. * \param [in] a unsigned long long type of value stored in a
  22832. * \param [in] b unsigned long long type of value stored in b
  22833. * \return value stored in long long type
  22834. */
  22835. __STATIC_FORCEINLINE long long __RV_DSMALDRS(long long t, unsigned long long a, unsigned long long b)
  22836. {
  22837. __ASM volatile("dsmaldrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22838. return t;
  22839. }
  22840. /* ===== Inline Function End for DSMALDRS ===== */
  22841. /* ===== Inline Function Start for DSMALXDS ===== */
  22842. /**
  22843. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22844. * \brief DSMALXDS (Four Cross Signed 16x16 with 64-bit Add and Sub)
  22845. * \details
  22846. * **Type**: DSP
  22847. *
  22848. * **Syntax**:\n
  22849. * ~~~
  22850. * DSMALXDS Rd, Rs1, Rs2
  22851. * ~~~
  22852. *
  22853. * **Purpose**:\n
  22854. * Do four cross signed 16x16 and add and subtraction signed multiplication results and a third register. The results are written into Rd.
  22855. *
  22856. * **Description**:\n
  22857. * It multiplies the bottom 16-bit content of Rs1 with the top 16-bit
  22858. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  22859. * Rs1 with the bottom 16-bit content of Rs2.
  22860. *
  22861. * **Operations**:\n
  22862. * ~~~
  22863. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22864. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22865. *
  22866. * m0 = op1b.H[1] s* op2b.H[0];
  22867. * m1 = op1b.H[0] s* op2b.H[1];
  22868. * m2 = op1t.H[1] s* op2t.H[0];
  22869. * m3 = op1t.H[0] s* op2t.H[1];
  22870. *
  22871. * Rd = Rd + m0 - m1 + m2 - m3;
  22872. * x=0
  22873. * ~~~
  22874. *
  22875. * \param [in] t long long type of value stored in t
  22876. * \param [in] a unsigned long long type of value stored in a
  22877. * \param [in] b unsigned long long type of value stored in b
  22878. * \return value stored in long long type
  22879. */
  22880. __STATIC_FORCEINLINE long long __RV_DSMALXDS(long long t, unsigned long long a, unsigned long long b)
  22881. {
  22882. __ASM volatile("dsmalxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22883. return t;
  22884. }
  22885. /* ===== Inline Function End for DSMALXDS ===== */
  22886. /* ===== Inline Function Start for DSMSLDA ===== */
  22887. /**
  22888. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22889. * \brief DSMSLDA (Four Signed 16x16 with 64-bit Sub)
  22890. * \details
  22891. * **Type**: DSP
  22892. *
  22893. * **Syntax**:\n
  22894. * ~~~
  22895. * DSMSLDA Rd, Rs1, Rs2
  22896. * ~~~
  22897. *
  22898. * **Purpose**:\n
  22899. * Do four signed 16x16 and subtraction signed multiplication results and add a third register. The results are written into Rd.
  22900. *
  22901. * **Description**:\n
  22902. * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  22903. * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
  22904. *
  22905. * **Operations**:\n
  22906. * ~~~
  22907. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22908. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22909. *
  22910. * m0 = op1b.H[0] s* op2b.H[0];
  22911. * m1 = op1b.H[1] s* op2b.H[1];
  22912. * m2 = op1t.H[0] s* op2t.H[0];
  22913. * m3 = op1t.H[1] s* op2t.H[1];
  22914. *
  22915. * Rd = Rd - m0 - m1 - m2 - m3;
  22916. * x=0
  22917. * ~~~
  22918. *
  22919. * \param [in] t long long type of value stored in t
  22920. * \param [in] a unsigned long long type of value stored in a
  22921. * \param [in] b unsigned long long type of value stored in b
  22922. * \return value stored in long long type
  22923. */
  22924. __STATIC_FORCEINLINE long long __RV_DSMSLDA(long long t, unsigned long long a, unsigned long long b)
  22925. {
  22926. __ASM volatile("dsmslda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22927. return t;
  22928. }
  22929. /* ===== Inline Function End for DSMSLDA ===== */
  22930. /* ===== Inline Function Start for DSMSLXDA ===== */
  22931. /**
  22932. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22933. * \brief DSMSLXDA (Four Cross Signed 16x16 with 64-bit Sub)
  22934. * \details
  22935. * **Type**: DSP
  22936. *
  22937. * **Syntax**:\n
  22938. * ~~~
  22939. * DSMSLXDA Rd, Rs1, Rs2
  22940. * ~~~
  22941. *
  22942. * **Purpose**:\n
  22943. * Do four signed 16x16 and subtraction signed multiplication results and add a third register. The results are written into Rd.
  22944. *
  22945. * **Description**:\n
  22946. * It multiplies the top 16-bit content of Rs1 with the bottom 16-bit
  22947. * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
  22948. *
  22949. * **Operations**:\n
  22950. * ~~~
  22951. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22952. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22953. *
  22954. * m0 = op1b.H[0] s* op2b.H[1];
  22955. * m1 = op1b.H[1] s* op2b.H[0];
  22956. * m2 = op1t.H[0] s* op2t.H[1];
  22957. * m3 = op1t.H[1] s* op2t.H[0];
  22958. *
  22959. * Rd = Rd - m0 - m1 - m2 - m3;
  22960. * x=0
  22961. * ~~~
  22962. *
  22963. * \param [in] t long long type of value stored in t
  22964. * \param [in] a unsigned long long type of value stored in a
  22965. * \param [in] b unsigned long long type of value stored in b
  22966. * \return value stored in long long type
  22967. */
  22968. __STATIC_FORCEINLINE long long __RV_DSMSLXDA(long long t, unsigned long long a, unsigned long long b)
  22969. {
  22970. __ASM volatile("dsmslxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22971. return t;
  22972. }
  22973. /* ===== Inline Function End for DSMSLXDA ===== */
  22974. /* ===== Inline Function Start for DDSMAQA ===== */
  22975. /**
  22976. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22977. * \brief DDSMAQA (Eight Signed 8x8 with 64-bit Add)
  22978. * \details
  22979. * **Type**: DSP
  22980. *
  22981. * **Syntax**:\n
  22982. * ~~~
  22983. * DDSMAQA Rd, Rs1, Rs2
  22984. * ~~~
  22985. *
  22986. * **Purpose**:\n
  22987. * Do eight signed 8x8 and add signed multiplication results and a third register. The results are written into Rd.
  22988. *
  22989. * **Description**:\n
  22990. * Do eight signed 8-bit multiplications from eight 8-bit chunks of two registers; and then adds
  22991. * the eight 16-bit results and the content of 64-bit chunks of a third register.
  22992. *
  22993. * **Operations**:\n
  22994. * ~~~
  22995. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22996. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22997. *
  22998. * m0 = op1b.B[0] s* op2b.B[0];
  22999. * m1 = op1b.B[1] s* op2b.B[1];
  23000. * m2 = op1b.B[2] s* op2b.B[2];
  23001. * m3 = op1b.B[3] s* op2b.B[3];
  23002. * m4 = op1t.B[0] s* op2t.B[0];
  23003. * m5 = op1t.B[1] s* op2t.B[1];
  23004. * m6 = op1t.B[2] s* op2t.B[2];
  23005. * m7 = op1t.B[3] s* op2t.B[3];
  23006. *
  23007. * s0 = m0 + m1 + m2 + m3;
  23008. * s1 = m4 + m5 + m6 + m7;
  23009. * Rd = Rd + s0 + s1;
  23010. * x=0
  23011. * ~~~
  23012. *
  23013. * \param [in] t long long type of value stored in t
  23014. * \param [in] a unsigned long long type of value stored in a
  23015. * \param [in] b unsigned long long type of value stored in b
  23016. * \return value stored in long long type
  23017. */
  23018. __STATIC_FORCEINLINE long long __RV_DDSMAQA(long long t, unsigned long long a, unsigned long long b)
  23019. {
  23020. __ASM volatile("ddsmaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23021. return t;
  23022. }
  23023. /* ===== Inline Function End for DDSMAQA ===== */
  23024. /* ===== Inline Function Start for DDSMAQA.SU ===== */
  23025. /**
  23026. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23027. * \brief DDSMAQA.SU (Eight Signed 8 x Unsigned 8 with 64-bit Add)
  23028. * \details
  23029. * **Type**: DSP
  23030. *
  23031. * **Syntax**:\n
  23032. * ~~~
  23033. * DDSMAQA.SU Rd, Rs1, Rs2
  23034. * ~~~
  23035. *
  23036. * **Purpose**:\n
  23037. * Do eight signed 8 x unsigned 8 and add signed multiplication results and a third register. The results are written into Rd.
  23038. *
  23039. * **Description**:\n
  23040. * Do eight signed 8 x unsigned 8 and add signed multiplication results and a third register; and then adds
  23041. * the eight 16-bit results and the content of 64-bit chunks of a third register.
  23042. *
  23043. * **Operations**:\n
  23044. * ~~~
  23045. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  23046. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  23047. *
  23048. * m0 = op1b.B[0] su* op2b.B[0];
  23049. * m1 = op1b.B[1] su* op2b.B[1];
  23050. * m2 = op1b.B[2] su* op2b.B[2];
  23051. * m3 = op1b.B[3] su* op2b.B[3];
  23052. * m4 = op1t.B[0] su* op2t.B[0];
  23053. * m5 = op1t.B[1] su* op2t.B[1];
  23054. * m6 = op1t.B[2] su* op2t.B[2];
  23055. * m7 = op1t.B[3] su* op2t.B[3];
  23056. *
  23057. * s0 = m0 + m1 + m2 + m3;
  23058. * s1 = m4 + m5 + m6 + m7;
  23059. * Rd = Rd + s0 + s1;
  23060. * x=0
  23061. * ~~~
  23062. *
  23063. * \param [in] t long long type of value stored in t
  23064. * \param [in] a unsigned long long type of value stored in a
  23065. * \param [in] b unsigned long long type of value stored in b
  23066. * \return value stored in long long type
  23067. */
  23068. __STATIC_FORCEINLINE long long __RV_DDSMAQA_SU(long long t, unsigned long long a, unsigned long long b)
  23069. {
  23070. __ASM volatile("ddsmaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23071. return t;
  23072. }
  23073. /* ===== Inline Function End for DDSMAQA.SU ===== */
  23074. /* ===== Inline Function Start for DDUMAQA ===== */
  23075. /**
  23076. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23077. * \brief DDUMAQA (Eight Unsigned 8x8 with 64-bit Unsigned Add)
  23078. * \details
  23079. * **Type**: DSP
  23080. *
  23081. * **Syntax**:\n
  23082. * ~~~
  23083. * DDUMAQA Rd, Rs1, Rs2
  23084. * ~~~
  23085. *
  23086. * **Purpose**:\n
  23087. * Do eight unsigned 8x8 and add unsigned multiplication results and a third register. The results are written into Rd.
  23088. *
  23089. * **Description**:\n
  23090. * Do eight unsigned 8x8 and add unsigned multiplication results and a third register; and then adds
  23091. * the eight 16-bit results and the content of 64-bit chunks of a third register.
  23092. *
  23093. * **Operations**:\n
  23094. * ~~~
  23095. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  23096. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  23097. *
  23098. * m0 = op1b.B[0] u* op2b.B[0];
  23099. * m1 = op1b.B[1] u* op2b.B[1];
  23100. * m2 = op1b.B[2] u* op2b.B[2];
  23101. * m3 = op1b.B[3] u* op2b.B[3];
  23102. * m4 = op1t.B[0] u* op2t.B[0];
  23103. * m5 = op1t.B[1] u* op2t.B[1];
  23104. * m6 = op1t.B[2] u* op2t.B[2];
  23105. * m7 = op1t.B[3] u* op2t.B[3];
  23106. *
  23107. * s0 = m0 + m1 + m2 + m3;
  23108. * s1 = m4 + m5 + m6 + m7;
  23109. * Rd = Rd + s0 + s1;
  23110. * x=0
  23111. * ~~~
  23112. *
  23113. * \param [in] t long long type of value stored in t
  23114. * \param [in] a unsigned long long type of value stored in a
  23115. * \param [in] b unsigned long long type of value stored in b
  23116. * \return value stored in long long type
  23117. */
  23118. __STATIC_FORCEINLINE long long __RV_DDUMAQA(long long t, unsigned long long a, unsigned long long b)
  23119. {
  23120. __ASM volatile("ddumaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23121. return t;
  23122. }
  23123. /* ===== Inline Function End for DDUMAQA ===== */
  23124. /* ===== Inline Function Start for DSMA32.u ===== */
  23125. /**
  23126. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23127. * \brief DSMA32.u (64-bit SIMD 32-bit Signed Multiply Addition With Rounding and Clip)
  23128. * \details
  23129. * **Type**: DSP
  23130. *
  23131. * **Syntax**:\n
  23132. * ~~~
  23133. * DSMA32.u Rd, Rs1, Rs2
  23134. * ~~~
  23135. *
  23136. * **Purpose**:\n
  23137. * Do two signed 32x32 and add signed multiplication results with Rounding, then right shift 32-bit and clip q63 to q31.
  23138. * The result is written to Rd.
  23139. *
  23140. * **Description**:\n
  23141. * For the `DSMA32.u` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the top 32-bit Q31
  23142. * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with
  23143. * the bottom 32-bit Q31 content of 64-bit chunks in Rs2.
  23144. * Then, do the addtion for the results above and perform the addtional rounding operations, and then move the data to the right
  23145. * by 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
  23146. *
  23147. * **Operations**:\n
  23148. * ~~~
  23149. * Rd = (q31_t)((Rs1.W[x] s* Rs2.W[x] + Rs1.W[x + 1] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
  23150. * x=0
  23151. * ~~~
  23152. *
  23153. * \param [in] a unsigned long long type of value stored in a
  23154. * \param [in] b unsigned long long type of value stored in b
  23155. * \return value stored in long type
  23156. */
  23157. __STATIC_FORCEINLINE long __RV_DSMA32_U(unsigned long long a, unsigned long long b)
  23158. {
  23159. long result;
  23160. __ASM volatile("dsma32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  23161. return result;
  23162. }
  23163. /* ===== Inline Function End for DSMA32.u ===== */
  23164. /* ===== Inline Function Start for DSMXS32.u ===== */
  23165. /**
  23166. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23167. * \brief DSMXS32.u (64-bit SIMD 32-bit Signed Multiply Cross Subtraction With Rounding and Clip)
  23168. * \details
  23169. * **Type**: DSP
  23170. *
  23171. * **Syntax**:\n
  23172. * ~~~
  23173. * DSMXS32.u Rd, Rs1, Rs2
  23174. * ~~~
  23175. *
  23176. * **Purpose**:\n
  23177. * Do two cross signed 32x32 and sub signed multiplication results with Rounding, then right shift 32-bit and clip q63 to
  23178. * q31. The result is written to Rd.
  23179. *
  23180. * **Description**:\n
  23181. * For the `DSMXS32.u` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit
  23182. * Q31 content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1
  23183. * with the top 32-bit Q31 content of 64-bit chunks in Rs2.
  23184. * Then, do the subtraction for the results above and perform the addtional rounding operations, and then move the data to the right by
  23185. * 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
  23186. *
  23187. * **Operations**:\n
  23188. * ~~~
  23189. * Rd = (q31_t)((Rs1.W[x + 1] s* Rs2.W[x] - Rs1.W[x] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
  23190. * x=0
  23191. * ~~~
  23192. *
  23193. * \param [in] a unsigned long long type of value stored in a
  23194. * \param [in] b unsigned long long type of value stored in b
  23195. * \return value stored in long type
  23196. */
  23197. __STATIC_FORCEINLINE long __RV_DSMXS32_U(unsigned long long a, unsigned long long b)
  23198. {
  23199. long result;
  23200. __ASM volatile("dsmxs32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  23201. return result;
  23202. }
  23203. /* ===== Inline Function End for DSMXS32.u ===== */
  23204. /* ===== Inline Function Start for DSMXA32.u ===== */
  23205. /**
  23206. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23207. * \brief DSMXA32.u (64-bit SIMD 32-bit Signed Cross Multiply Addition with Rounding and Clip)
  23208. * \details
  23209. * **Type**: DSP
  23210. *
  23211. * **Syntax**:\n
  23212. * ~~~
  23213. * DSMXA32.u Rd, Rs1, Rs2
  23214. * ~~~
  23215. *
  23216. * **Purpose**:\n
  23217. * Do two cross signed 32x32 and add signed multiplication results with Rounding, then right shift 32-bit and clip q63 to
  23218. * q31. The result is written to Rd.
  23219. *
  23220. * **Description**:\n
  23221. * For the `DSMXA32.u` instruction,multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit Q31
  23222. * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with
  23223. * the top 32-bit Q31 content of 64-bit chunks in Rs2.
  23224. * Then, do the addtion for the results above and perform the addtional rounding operations, and then move the data to the right
  23225. * by 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
  23226. *
  23227. * **Operations**:\n
  23228. * ~~~
  23229. * Rd = (q31_t)((Rs1.W[x + 1] s* Rs2.W[x] + Rs1.W[x] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
  23230. * x=0
  23231. * ~~~
  23232. *
  23233. * \param [in] a unsigned long long type of value stored in a
  23234. * \param [in] b unsigned long long type of value stored in b
  23235. * \return value stored in long type
  23236. */
  23237. __STATIC_FORCEINLINE long __RV_DSMXA32_U(unsigned long long a, unsigned long long b)
  23238. {
  23239. long result;
  23240. __ASM volatile("dsmxa32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  23241. return result;
  23242. }
  23243. /* ===== Inline Function End for DSMXA32.u ===== */
  23244. /* ===== Inline Function Start for DSMS32.u ===== */
  23245. /**
  23246. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23247. * \brief DSMS32.u (64-bit SIMD 32-bit Signed Multiply Subtraction with Rounding and Clip)
  23248. * \details
  23249. * **Type**: DSP
  23250. *
  23251. * **Syntax**:\n
  23252. * ~~~
  23253. * DSMS32.u Rd, Rs1, Rs2
  23254. * ~~~
  23255. *
  23256. * **Purpose**:\n
  23257. * Do two signed 32x32 and sub signed multiplication results with Rounding, then right shift 32-bit and clip q63 to q31. The
  23258. * result is written to Rd.
  23259. *
  23260. * **Description**:\n
  23261. * For the `DSMS32.u` instruction, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit
  23262. * Q31 content of 64-bit chunks in Rs2. At the same time, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with
  23263. * the top 32-bit Q31 content of 64-bit chunks in Rs2.
  23264. * Then, do the subtraction for the results above and perform the addtional rounding operations, and then move the data to the right by
  23265. * 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
  23266. *
  23267. * **Operations**:\n
  23268. * ~~~
  23269. * Rd = (q31_t)((Rs1.W[x] s* Rs2.W[x] - Rs1.W[x + 1] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
  23270. * x=0
  23271. * ~~~
  23272. *
  23273. * \param [in] a unsigned long long type of value stored in a
  23274. * \param [in] b unsigned long long type of value stored in b
  23275. * \return value stored in long type
  23276. */
  23277. __STATIC_FORCEINLINE long __RV_DSMS32_U(unsigned long long a, unsigned long long b)
  23278. {
  23279. long result;
  23280. __ASM volatile("dsms32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  23281. return result;
  23282. }
  23283. /* ===== Inline Function End for DSMS32.u ===== */
  23284. /* ===== Inline Function Start for DSMADA16 ===== */
  23285. /**
  23286. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23287. * \brief DSMADA16 (Signed Multiply Two Halfs and Two Adds 32-bit)
  23288. * \details
  23289. * **Type**: SIMD
  23290. *
  23291. * **Syntax**:\n
  23292. * ~~~
  23293. * DSMADA16 Rd, Rs1, Rs2
  23294. * ~~~
  23295. *
  23296. * **Purpose**:\n
  23297. * Do two signed 16-bit multiplications of two 32-bit registers; and then adds the 32-bit results and the 32-bit value of an
  23298. * even/odd pair of registers together.
  23299. * * DSMADA16: rt pair+ top*top + bottom*bottom
  23300. *
  23301. * **Description**:\n
  23302. * This instruction multiplies the per 16-bit content of the 32-bit elements of Rs1 with the corresponding 16-bit content of
  23303. * the 32-bit elements of Rs2. The result is added to the 32-bit value of an even/odd pair of registers specified by Rd(4,1).
  23304. * The 32-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 32-bit value of the
  23305. * register-pair are treated as signed integers.
  23306. *
  23307. * **Operations**:\n
  23308. * ~~~
  23309. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  23310. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  23311. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  23312. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  23313. * Rd.W = Rd.W + SE32(Mres0[0][31:0]) + SE32(Mres1[0][31:0]) + SE32(Mres0[1][31:0]) + SE32(Mres1[1][31:0]);
  23314. * ~~~
  23315. *
  23316. * \param [in] t long long type of value stored in t
  23317. * \param [in] a unsigned long long type of value stored in a
  23318. * \param [in] b unsigned long long type of value stored in b
  23319. * \return value stored in long type
  23320. */
  23321. __STATIC_FORCEINLINE long __RV_DSMADA16(long long t, unsigned long long a, unsigned long long b)
  23322. {
  23323. __ASM volatile("dsmada16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23324. return (long)t;
  23325. }
  23326. /* ===== Inline Function End for DSMADA16 ===== */
  23327. /* ===== Inline Function Start for DSMAXDA16 ===== */
  23328. /**
  23329. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23330. * \brief DSMAXDA16 (Signed Crossed Multiply Two Halfs and Two Adds 32-bit)
  23331. * \details
  23332. * **Type**: SIMD
  23333. *
  23334. * **Syntax**:\n
  23335. * ~~~
  23336. * DSMAXDA16 Rd, Rs1, Rs2
  23337. * ~~~
  23338. *
  23339. * **Purpose**:\n
  23340. * Do two signed 16-bit multiplications of two 32-bit registers; and then adds the 32-bit results and the 32-bit value of an
  23341. * even/odd pair of registers together.
  23342. * * DSMAXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
  23343. *
  23344. * **Description**:\n
  23345. * This instruction crossly multiplies the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit
  23346. * elements of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of the 32-bit elements of
  23347. * Rs1 with the top 16-bit content of the 32-bit elements of Rs2 with unlimited precision. The result is added to the 64-bit
  23348. * value of an even/odd pair of registers specified by Rd(4,1).The 64-bit addition result is clipped to 32-bit result.
  23349. *
  23350. * **Operations**:\n
  23351. * ~~~
  23352. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  23353. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
  23354. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  23355. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
  23356. * Rd.W = Rd.W + SE32(Mres0[0][31:0]) + SE32(Mres1[0][31:0]) + SE32(Mres0[1][31:0]) + SE32(Mres1[1][31:0]);
  23357. * ~~~
  23358. *
  23359. * \param [in] t long long type of value stored in t
  23360. * \param [in] a unsigned long long type of value stored in a
  23361. * \param [in] b unsigned long long type of value stored in b
  23362. * \return value stored in long type
  23363. */
  23364. __STATIC_FORCEINLINE long __RV_DSMAXDA16(long long t, unsigned long long a, unsigned long long b)
  23365. {
  23366. __ASM volatile("dsmaxda16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23367. return (long)t;
  23368. }
  23369. /* ===== Inline Function End for DSMAXDA16 ===== */
  23370. /* ===== Inline Function Start for DKSMS32.u ===== */
  23371. /**
  23372. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23373. * \brief DKSMS32.u (Two Signed Multiply Shift-clip and Saturation with Rounding)
  23374. * \details
  23375. * **Type**: SIMD
  23376. *
  23377. * **Syntax**:\n
  23378. * ~~~
  23379. * DKSMS32.u Rd, Rs1, Rs2
  23380. * ~~~
  23381. *
  23382. * **Purpose**:\n
  23383. * Computes saturated multiplication of two pairs of q31 type with shifted rounding.
  23384. *
  23385. * **Description**:\n
  23386. * Compute the multiplication of Rs1 and Rs2 of type q31_t, intercept [47:16] for the resulting 64-bit product
  23387. * to get the 32-bit number, then add 1 to it to do rounding, and finally saturate the result after rounding.
  23388. *
  23389. * **Operations**:\n
  23390. * ~~~
  23391. * Mres[x][63:0] = Rs1.W[x] s* Rs2.W[x];
  23392. * Round[x][32:0] = Mres[x][47:15] + 1;
  23393. * Rd.W[x] = sat.31(Rd.W[x] + Round[x][32:1]);
  23394. * x=1...0
  23395. * ~~~
  23396. *
  23397. * \param [in] t unsigned long long type of value stored in t
  23398. * \param [in] a unsigned long long type of value stored in a
  23399. * \param [in] b unsigned long long type of value stored in b
  23400. * \return value stored in unsigned long long type
  23401. */
  23402. __STATIC_FORCEINLINE unsigned long long __RV_DKSMS32_U(unsigned long long t, unsigned long long a, unsigned long long b)
  23403. {
  23404. __ASM volatile("dksms32.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23405. return t;
  23406. }
  23407. /* ===== Inline Function End for DKSMS32.u ===== */
  23408. /* ===== Inline Function Start for DMADA32 ===== */
  23409. /**
  23410. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23411. * \brief DMADA32 ((Two Cross Signed 32x32 with 64-bit Add and Clip to 32-bit)
  23412. * \details
  23413. * **Type**: SIMD
  23414. *
  23415. * **Syntax**:\n
  23416. * ~~~
  23417. * DMADA32 Rd, Rs1, Rs2
  23418. * ~~~
  23419. *
  23420. * **Purpose**:\n
  23421. * Do two cross signed 32x32 and add the signed multiplication results to q63, then clip the q63 result to q31 , the final results
  23422. * are written into Rd.
  23423. *
  23424. * **Description**:\n
  23425. * For the `DMADA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit element in Rs2 and
  23426. * then adds the result to the result of multiplying the bottom 32-bit element in Rs1 with the top 32-bit element in Rs2, then
  23427. * clip the q63 result to q31.
  23428. *
  23429. * **Operations**:\n
  23430. * ~~~
  23431. * res = (q31_t)((((q63_t) Rd.w[0] << 32) + (q63_t)Rs1.w[0] s* Rs2.w[1] + (q63_t)Rs1.w[1] s* Rs2.w[0]) s>> 32);
  23432. * rd = res;
  23433. * ~~~
  23434. *
  23435. * \param [in] t long long type of value stored in t
  23436. * \param [in] a unsigned long long type of value stored in a
  23437. * \param [in] b unsigned long long type of value stored in b
  23438. * \return value stored in long type
  23439. */
  23440. __STATIC_FORCEINLINE long __RV_DMADA32(long long t, unsigned long long a, unsigned long long b)
  23441. {
  23442. __ASM volatile("dmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23443. return (long)t;
  23444. }
  23445. /* ===== Inline Function End for DMADA32 ===== */
  23446. /* ===== Inline Function Start for DSMALBB ===== */
  23447. /**
  23448. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23449. * \brief DSMALBB (Signed Multiply Bottom Halfs & Add 64-bit)
  23450. * \details
  23451. * **Type**: SIMD
  23452. *
  23453. * **Syntax**:\n
  23454. * ~~~
  23455. * DSMALBB Rd, Rs1, Rs2
  23456. * ~~~
  23457. *
  23458. * **Purpose**:\n
  23459. * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit
  23460. * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result
  23461. * is written back to the register-pair.
  23462. * * DSMALBB: rt pair + bottom*bottom (all 32-bit elements)
  23463. *
  23464. * **Description**:\n
  23465. * For the `DSMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit content of Rs2.The
  23466. * multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd.
  23467. *
  23468. * **Operations**:\n
  23469. * ~~~
  23470. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
  23471. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
  23472. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  23473. * ~~~
  23474. *
  23475. * \param [in] t long long type of value stored in t
  23476. * \param [in] a unsigned long long type of value stored in a
  23477. * \param [in] b unsigned long long type of value stored in b
  23478. * \return value stored in long long type
  23479. */
  23480. __STATIC_FORCEINLINE long long __RV_DSMALBB(long long t, unsigned long long a, unsigned long long b)
  23481. {
  23482. __ASM volatile("dsmalbb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23483. return t;
  23484. }
  23485. /* ===== Inline Function End for DSMALBB ===== */
  23486. /* ===== Inline Function Start for DSMALBT ===== */
  23487. /**
  23488. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23489. * \brief DSMALBT (Signed Multiply Bottom Half & Top Half & Add 64-bit)
  23490. * \details
  23491. * **Type**: SIMD
  23492. *
  23493. * **Syntax**:\n
  23494. * ~~~
  23495. * DSMALBT Rd, Rs1, Rs2
  23496. * ~~~
  23497. *
  23498. * **Purpose**:\n
  23499. * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit
  23500. * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result
  23501. * is written back to the register-pair.
  23502. * * DSMALBT: rt pair + bottom*top (all 32-bit elements)
  23503. *
  23504. * **Description**:\n
  23505. * For the `DSMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
  23506. * content of the 32-bit elements of Rs2.
  23507. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
  23508. * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers
  23509. *
  23510. * **Operations**:\n
  23511. * ~~~
  23512. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
  23513. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
  23514. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  23515. * ~~~
  23516. *
  23517. * \param [in] t long long type of value stored in t
  23518. * \param [in] a unsigned long long type of value stored in a
  23519. * \param [in] b unsigned long long type of value stored in b
  23520. * \return value stored in long long type
  23521. */
  23522. __STATIC_FORCEINLINE long long __RV_DSMALBT(long long t, unsigned long long a, unsigned long long b)
  23523. {
  23524. __ASM volatile("dsmalbt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23525. return t;
  23526. }
  23527. /* ===== Inline Function End for DSMALBT ===== */
  23528. /* ===== Inline Function Start for DSMALTT ===== */
  23529. /**
  23530. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23531. * \brief DSMALTT (Signed Multiply Top Half & Add 64-bit)
  23532. * \details
  23533. * **Type**: SIMD
  23534. *
  23535. * **Syntax**:\n
  23536. * ~~~
  23537. * DSMALTT Rd, Rs1, Rs2
  23538. * ~~~
  23539. *
  23540. * **Purpose**:\n
  23541. * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit
  23542. * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result
  23543. * is written back to the register-pair.
  23544. * * DSMALTT: DSMALTT rt pair + top*top (all 32-bit elements)
  23545. *
  23546. * **Description**:\n
  23547. * For the `DSMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
  23548. * content of the 32-bit elements of Rs2.
  23549. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
  23550. * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
  23551. *
  23552. * **Operations**:\n
  23553. * ~~~
  23554. * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
  23555. * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
  23556. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  23557. * ~~~
  23558. *
  23559. * \param [in] t long long type of value stored in t
  23560. * \param [in] a unsigned long long type of value stored in a
  23561. * \param [in] b unsigned long long type of value stored in b
  23562. * \return value stored in long long type
  23563. */
  23564. __STATIC_FORCEINLINE long long __RV_DSMALTT(long long t, unsigned long long a, unsigned long long b)
  23565. {
  23566. __ASM volatile("dsmaltt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23567. return t;
  23568. }
  23569. /* ===== Inline Function End for DSMALTT ===== */
  23570. /* ===== Inline Function Start for DKMABB32 ===== */
  23571. /**
  23572. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23573. * \brief DKMABB32 (Saturating Signed Multiply Bottom Words & Add)
  23574. * \details
  23575. * **Type**: SIMD
  23576. *
  23577. * **Syntax**:\n
  23578. * ~~~
  23579. * DKMABB32 Rd, Rs1, Rs2
  23580. * ~~~
  23581. *
  23582. * **Purpose**:\n
  23583. * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content
  23584. * of 64-bit data in the third register. The addition result may besaturated and is written to the third register.
  23585. * * DKMABB32: rd + bottom*bottom
  23586. *
  23587. * **Description**:\n
  23588. * For the `DKMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit element in Rs2
  23589. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range
  23590. * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd.
  23591. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  23592. *
  23593. * **Operations**:\n
  23594. * ~~~
  23595. * res = Rd + (Rs1.W[0] * Rs2.W[0]);
  23596. * if (res > (2^63)-1) {
  23597. * res = (2^63)-1;
  23598. * OV = 1;
  23599. * } else if (res < -2^63) {
  23600. * res = -2^63;
  23601. * OV = 1;
  23602. * }
  23603. * Rd = res;
  23604. * ~~~
  23605. *
  23606. * \param [in] t long long type of value stored in t
  23607. * \param [in] a unsigned long long type of value stored in a
  23608. * \param [in] b unsigned long long type of value stored in b
  23609. * \return value stored in long long type
  23610. */
  23611. __STATIC_FORCEINLINE long long __RV_DKMABB32(long long t, unsigned long long a, unsigned long long b)
  23612. {
  23613. __ASM volatile("dkmabb32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23614. return t;
  23615. }
  23616. /* ===== Inline Function End for DKMABB32 ===== */
  23617. /* ===== Inline Function Start for DKMABT32 ===== */
  23618. /**
  23619. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23620. * \brief DKMABT32 (Saturating Signed Multiply Bottom & Top Words & Add)
  23621. * \details
  23622. * **Type**: SIMD
  23623. *
  23624. * **Syntax**:\n
  23625. * ~~~
  23626. * DKMABT32 Rd, Rs1, Rs2
  23627. * ~~~
  23628. *
  23629. * **Purpose**:\n
  23630. * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content
  23631. * of 64-bit data in the third register. The addition result may be saturated and is written to the third register.
  23632. * * DKMABT32: rd + bottom*top
  23633. *
  23634. * **Description**:\n
  23635. * For the `DKMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit element in Rs2
  23636. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range
  23637. * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd.
  23638. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  23639. *
  23640. * **Operations**:\n
  23641. * ~~~
  23642. * res = Rd + (Rs1.W[0] * Rs2.W[1]);
  23643. * if (res > (2^63)-1) {
  23644. * res = (2^63)-1;
  23645. * OV = 1;
  23646. * } else if (res < -2^63) {
  23647. * res = -2^63;
  23648. * OV = 1;
  23649. * }
  23650. * Rd = res;
  23651. * ~~~
  23652. *
  23653. * \param [in] t long long type of value stored in t
  23654. * \param [in] a unsigned long long type of value stored in a
  23655. * \param [in] b unsigned long long type of value stored in b
  23656. * \return value stored in long long type
  23657. */
  23658. __STATIC_FORCEINLINE long long __RV_DKMABT32(long long t, unsigned long long a, unsigned long long b)
  23659. {
  23660. __ASM volatile("dkmabt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23661. return t;
  23662. }
  23663. /* ===== Inline Function End for DKMABT32 ===== */
  23664. /* ===== Inline Function Start for DKMATT32 ===== */
  23665. /**
  23666. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23667. * \brief DKMATT32 (Saturating Signed Multiply Bottom & Top Words & Add)
  23668. * \details
  23669. * **Type**: SIMD
  23670. *
  23671. * **Syntax**:\n
  23672. * ~~~
  23673. * DKMATT32 Rd, Rs1, Rs2
  23674. * ~~~
  23675. *
  23676. * **Purpose**:\n
  23677. * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content
  23678. * of 64-bit data in the third register. The addition result may be saturated and is written to the third register.
  23679. * * DKMATT32: rd + top*top
  23680. *
  23681. * **Description**:\n
  23682. * For the `DKMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit element in Rs2
  23683. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range
  23684. * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd.
  23685. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  23686. *
  23687. * **Operations**:\n
  23688. * ~~~
  23689. * res = Rd + (Rs1.W[1] * Rs2.W[1]);
  23690. * if (res > (2^63)-1) {
  23691. * res = (2^63)-1;
  23692. * OV = 1;
  23693. * } else if (res < -2^63) {
  23694. * res = -2^63;
  23695. * OV = 1;
  23696. * }
  23697. * Rd = res;
  23698. * ~~~
  23699. *
  23700. * \param [in] t long long type of value stored in t
  23701. * \param [in] a unsigned long long type of value stored in a
  23702. * \param [in] b unsigned long long type of value stored in b
  23703. * \return value stored in unsigned long long type
  23704. */
  23705. __STATIC_FORCEINLINE long long __RV_DKMATT32(long long t, unsigned long long a, unsigned long long b)
  23706. {
  23707. __ASM volatile("dkmatt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23708. return t;
  23709. }
  23710. /* ===== Inline Function End for DKMATT32 ===== */
  23711. #endif /* __RISCV_XLEN == 32 */
  23712. #elif defined (__ICCRISCV__)
  23713. #if __riscv_xlen == 32
  23714. #include "iar_nds32_intrinsic.h"
  23715. #elif __riscv_xlen == 64
  23716. #include "iar_nds64_intrinsic.h"
  23717. #else
  23718. #error "Unexpected RISC-V XLEN size."
  23719. #endif /* __riscv_xlen == 32 */
  23720. #pragma language=save
  23721. #pragma language=extended
  23722. // Redefine those compatible instruction name supplied by IAR
  23723. #define __RV_CLROV __nds__clrov
  23724. #define __RV_RDOV __nds__rdov
  23725. #define __RV_ADD8 __nds__add8
  23726. #define __RV_SUB8 __nds__sub8
  23727. #define __RV_ADD16 __nds__add16
  23728. #define __RV_SUB16 __nds__sub16
  23729. #define __RV_ADD64 __nds__add64
  23730. #define __RV_SUB64 __nds__sub64
  23731. #define __RV_RADD8 __nds__radd8
  23732. #define __RV_RSUB8 __nds__rsub8
  23733. #define __RV_RADD16 __nds__radd16
  23734. #define __RV_RSUB16 __nds__rsub16
  23735. #define __RV_RADD64 __nds__radd64
  23736. #define __RV_RSUB64 __nds__rsub64
  23737. #define __RV_RADDW __nds__raddw
  23738. #define __RV_RSUBW __nds__rsubw
  23739. #define __RV_URADD8 __nds__uradd8
  23740. #define __RV_URSUB8 __nds__ursub8
  23741. #define __RV_URADD16 __nds__uradd16
  23742. #define __RV_URSUB16 __nds__ursub16
  23743. #define __RV_URADD64 __nds__uradd64
  23744. #define __RV_URSUB64 __nds__ursub64
  23745. #define __RV_URADDW __nds__uraddw
  23746. #define __RV_URSUBW __nds__ursubw
  23747. #define __RV_KADD8 __nds__kadd8
  23748. #define __RV_KSUB8 __nds__ksub8
  23749. #define __RV_KADD16 __nds__kadd16
  23750. #define __RV_KSUB16 __nds__ksub16
  23751. #define __RV_KADD64 __nds__kadd64
  23752. #define __RV_KSUB64 __nds__ksub64
  23753. #define __RV_KADDH __nds__kaddh
  23754. #define __RV_KSUBH __nds__ksubh
  23755. #define __RV_KADDW __nds__kaddw
  23756. #define __RV_KSUBW __nds__ksubw
  23757. #define __RV_UKADD8 __nds__ukadd8
  23758. #define __RV_UKSUB8 __nds__uksub8
  23759. #define __RV_UKADD16 __nds__ukadd16
  23760. #define __RV_UKSUB16 __nds__uksub16
  23761. #define __RV_UKADD64 __nds__ukadd64
  23762. #define __RV_UKSUB64 __nds__uksub64
  23763. #define __RV_UKADDH __nds__ukaddh
  23764. #define __RV_UKSUBH __nds__uksubh
  23765. #define __RV_UKADDW __nds__ukaddw
  23766. #define __RV_UKSUBW __nds__uksubw
  23767. #define __RV_CRAS16 __nds__cras16
  23768. #define __RV_CRSA16 __nds__crsa16
  23769. #define __RV_RCRAS16 __nds__rcras16
  23770. #define __RV_RCRSA16 __nds__rcrsa16
  23771. #define __RV_URCRAS16 __nds__urcras16
  23772. #define __RV_URCRSA16 __nds__urcrsa16
  23773. #define __RV_KCRAS16 __nds__kcras16
  23774. #define __RV_KCRSA16 __nds__kcrsa16
  23775. #define __RV_UKCRAS16 __nds__ukcras16
  23776. #define __RV_UKCRSA16 __nds__ukcrsa16
  23777. #define __RV_SRA8 __nds__sra8
  23778. #define __RV_SRAI8 __nds__sra8
  23779. #define __RV_SRA16 __nds__sra16
  23780. #define __RV_SRAI16 __nds__sra16
  23781. #define __RV_SRL8 __nds__srl8
  23782. #define __RV_SRL16 __nds__srl16
  23783. #define __RV_SLL8 __nds__sll8
  23784. #define __RV_SLL16 __nds__sll16
  23785. #define __RV_SRA_U __nds__sra_u
  23786. #define __RV_SRA8_U __nds__sra8_u
  23787. #define __RV_SRA16_U __nds__sra16_u
  23788. #define __RV_SRL8_U __nds__srl8_u
  23789. #define __RV_SRL16_U __nds__srl16_u
  23790. #define __RV_KSLL8 __nds__ksll8
  23791. #define __RV_KSLL16 __nds__ksll16
  23792. #define __RV_KSLLW __nds__ksllw
  23793. #define __RV_KSLRA8 __nds__kslra8
  23794. #define __RV_KSLRA8_U __nds__kslra8_u
  23795. #define __RV_KSLRA16 __nds__kslra16
  23796. #define __RV_KSLRA16_U __nds__kslra16_u
  23797. #define __RV_KSLRAW __nds__kslraw
  23798. #define __RV_KSLRAW_U __nds__kslraw_u
  23799. #define __RV_CMPEQ8 __nds__cmpeq8
  23800. #define __RV_CMPEQ16 __nds__cmpeq16
  23801. #define __RV_SCMPLE8 __nds__scmple8
  23802. #define __RV_SCMPLE16 __nds__scmple16
  23803. #define __RV_SCMPLT8 __nds__scmplt8
  23804. #define __RV_SCMPLT16 __nds__scmplt16
  23805. #define __RV_UCMPLE8 __nds__ucmple8
  23806. #define __RV_UCMPLE16 __nds__ucmple16
  23807. #define __RV_UCMPLT8 __nds__ucmplt8
  23808. #define __RV_UCMPLT16 __nds__ucmplt16
  23809. #define __RV_SMUL8 __nds__smul8
  23810. #define __RV_UMUL8 __nds__umul8
  23811. #define __RV_SMUL16 __nds__smul16
  23812. #define __RV_UMUL16 __nds__umul16
  23813. #define __RV_SMULX8 __nds__smulx8
  23814. #define __RV_UMULX8 __nds__umulx8
  23815. #define __RV_SMULX16 __nds__smulx16
  23816. #define __RV_UMULX16 __nds__umulx16
  23817. #define __RV_KHM8 __nds__khm8
  23818. #define __RV_KHMX8 __nds__khmx8
  23819. #define __RV_KHM16 __nds__khm16
  23820. #define __RV_KHMX16 __nds__khmx16
  23821. #define __RV_MULR64 __nds__mulr64
  23822. #define __RV_MULSR64 __nds__mulsr64
  23823. #define __RV_SMMUL __nds__smmul
  23824. #define __RV_SMMUL_U __nds__smmul_u
  23825. #define __RV_WEXT __nds__wext
  23826. #define __RV_SUNPKD810 __nds__sunpkd810
  23827. #define __RV_SUNPKD820 __nds__sunpkd820
  23828. #define __RV_SUNPKD830 __nds__sunpkd830
  23829. #define __RV_SUNPKD831 __nds__sunpkd831
  23830. #define __RV_SUNPKD832 __nds__sunpkd832
  23831. #define __RV_ZUNPKD810 __nds__zunpkd810
  23832. #define __RV_ZUNPKD820 __nds__zunpkd820
  23833. #define __RV_ZUNPKD830 __nds__zunpkd830
  23834. #define __RV_ZUNPKD831 __nds__zunpkd831
  23835. #define __RV_ZUNPKD832 __nds__zunpkd832
  23836. #define __RV_PKBB16 __nds__pkbb16
  23837. #define __RV_PKBT16 __nds__pkbt16
  23838. #define __RV_PKTT16 __nds__pktt16
  23839. #define __RV_PKTB16 __nds__pktb16
  23840. #define __RV_KMMAC __nds__kmmac
  23841. #define __RV_KMMAC_U __nds__kmmac_u
  23842. #define __RV_KMMSB __nds__kmmsb
  23843. #define __RV_KMMSB_U __nds__kmmsb_u
  23844. #define __RV_KWMMUL __nds__kwmmul
  23845. #define __RV_KWMMUL_U __nds__kwmmul_u
  23846. #define __RV_SMMWB __nds__smmwb
  23847. #define __RV_SMMWB_U __nds__smmwb_u
  23848. #define __RV_SMMWT __nds__smmwt
  23849. #define __RV_SMMWT_U __nds__smmwt_u
  23850. #define __RV_KMMAWB __nds__kmmawb
  23851. #define __RV_KMMAWB_U __nds__kmmawb_u
  23852. #define __RV_KMMAWT __nds__kmmawt
  23853. #define __RV_KMMAWT_U __nds__kmmawt_u
  23854. #define __RV_KMMWB2 __nds__kmmwb2
  23855. #define __RV_KMMWB2_U __nds__kmmwb2_u
  23856. #define __RV_KMMWT2 __nds__kmmwt2
  23857. #define __RV_KMMWT2_U __nds__kmmwt2_u
  23858. #define __RV_KMMAWB2 __nds__kmmawb2
  23859. #define __RV_KMMAWB2_U __nds__kmmawb2_u
  23860. #define __RV_KMMAWT2 __nds__kmmawt2
  23861. #define __RV_KMMAWT2_U __nds__kmmawt2_u
  23862. #define __RV_SMBB16 __nds__smbb16
  23863. #define __RV_SMBT16 __nds__smbt16
  23864. #define __RV_SMTT16 __nds__smtt16
  23865. #define __RV_KMDA __nds__kmda
  23866. #define __RV_KMXDA __nds__kmxda
  23867. #define __RV_SMDS __nds__smds
  23868. #define __RV_SMDRS __nds__smdrs
  23869. #define __RV_SMXDS __nds__smxds
  23870. #define __RV_KMABB __nds__kmabb
  23871. #define __RV_KMABT __nds__kmabt
  23872. #define __RV_KMATT __nds__kmatt
  23873. #define __RV_KMADA __nds__kmada
  23874. #define __RV_KMAXDA __nds__kmaxda
  23875. #define __RV_KMADS __nds__kmads
  23876. #define __RV_KMADRS __nds__kmadrs
  23877. #define __RV_KMAXDS __nds__kmaxds
  23878. #define __RV_KMSDA __nds__kmsda
  23879. #define __RV_KMSXDA __nds__kmsxda
  23880. #define __RV_SMAL __nds__smal
  23881. #define __RV_SMAQA __nds__smaqa
  23882. #define __RV_UMAQA __nds__umaqa
  23883. #define __RV_SMAQA_SU __nds__smaqa_su
  23884. #define __RV_SMAR64 __nds__smar64
  23885. #define __RV_SMSR64 __nds__smsr64
  23886. #define __RV_UMAR64 __nds__umar64
  23887. #define __RV_UMSR64 __nds__umsr64
  23888. #define __RV_KMAR64 __nds__kmar64
  23889. #define __RV_KMSR64 __nds__kmsr64
  23890. #define __RV_UKMAR64 __nds__ukmar64
  23891. #define __RV_UKMSR64 __nds__ukmsr64
  23892. #define __RV_SMALBB __nds__smalbb
  23893. #define __RV_SMALBT __nds__smalbt
  23894. #define __RV_SMALTT __nds__smaltt
  23895. #define __RV_SMALDA __nds__smalda
  23896. #define __RV_SMALXDA __nds__smalxda
  23897. #define __RV_SMALDS __nds__smalds
  23898. #define __RV_SMALDRS __nds__smaldrs
  23899. #define __RV_SMALXDS __nds__smalxds
  23900. #define __RV_SMSLDA __nds__smslda
  23901. #define __RV_SMSLXDA __nds__smslxda
  23902. #define __RV_MINW __nds__minw
  23903. #define __RV_MAXW __nds__maxw
  23904. #define __RV_SMIN8 __nds__smin8
  23905. #define __RV_SMAX8 __nds__smax8
  23906. #define __RV_SMIN16 __nds__smin16
  23907. #define __RV_SMAX16 __nds__smax16
  23908. #define __RV_UMIN8 __nds__umin8
  23909. #define __RV_UMAX8 __nds__umax8
  23910. #define __RV_UMIN16 __nds__umin16
  23911. #define __RV_UMAX16 __nds__umax16
  23912. #define __RV_KABS8 __nds__kabs8
  23913. #define __RV_KABS16 __nds__kabs16
  23914. #define __RV_KABSW __nds__kabsw
  23915. #define __RV_SCLIP8 __nds__sclip8
  23916. #define __RV_SCLIP16 __nds__sclip16
  23917. #define __RV_SCLIP32 __nds__sclip32
  23918. #define __RV_UCLIP8 __nds__uclip8
  23919. #define __RV_UCLIP16 __nds__uclip16
  23920. #define __RV_UCLIP32 __nds__uclip32
  23921. #define __RV_CLO8 __nds__clo8
  23922. #define __RV_CLO16 __nds__clo16
  23923. #define __RV_CLO32 __nds__clo32
  23924. #define __RV_CLZ8 __nds__clz8
  23925. #define __RV_CLZ16 __nds__clz16
  23926. #define __RV_CLZ32 __nds__clz32
  23927. #define __RV_CLRS8 __nds__clrs8
  23928. #define __RV_CLRS16 __nds__clrs16
  23929. #define __RV_CLRS32 __nds__clrs32
  23930. #define __RV_SWAP8 __nds__swap8
  23931. #define __RV_SWAP16 __nds__swap16
  23932. #define __RV_KHMBB __nds__khmbb
  23933. #define __RV_KHMBT __nds__khmbt
  23934. #define __RV_KHMTT __nds__khmtt
  23935. #define __RV_KDMBB __nds__kdmbb
  23936. #define __RV_KDMBT __nds__kdmbt
  23937. #define __RV_KDMTT __nds__kdmtt
  23938. #define __RV_KDMABB __nds__kdmabb
  23939. #define __RV_KDMABT __nds__kdmabt
  23940. #define __RV_KDMATT __nds__kdmatt
  23941. #define __RV_MADDR32 __nds__maddr32
  23942. #define __RV_MSUBR32 __nds__msubr32
  23943. #define __RV_PBSAD __nds__pbsad
  23944. #define __RV_PBSADA __nds__pbsada
  23945. #define __RV_AVE __nds__ave
  23946. #define __RV_BITREV __nds__bitrev
  23947. #define __RV_INSB __nds__insb
  23948. #if (__riscv_xlen == 64)
  23949. #define __RV_ADD32 __nds__add32
  23950. #define __RV_SUB32 __nds__sub32
  23951. #define __RV_RADD32 __nds__radd32
  23952. #define __RV_RSUB32 __nds__rsub32
  23953. #define __RV_URADD32 __nds__uradd32
  23954. #define __RV_URSUB32 __nds__ursub32
  23955. #define __RV_KADD32 __nds__kadd32
  23956. #define __RV_KSUB32 __nds__ksub32
  23957. #define __RV_UKADD32 __nds__ukadd32
  23958. #define __RV_UKSUB32 __nds__uksub32
  23959. #define __RV_CRAS32 __nds__cras32
  23960. #define __RV_CRSA32 __nds__crsa32
  23961. #define __RV_RCRAS32 __nds__rcras32
  23962. #define __RV_RCRSA32 __nds__rcrsa32
  23963. #define __RV_URCRAS32 __nds__urcras32
  23964. #define __RV_URCRSA32 __nds__urcrsa32
  23965. #define __RV_KCRAS32 __nds__kcras32
  23966. #define __RV_KCRSA32 __nds__kcrsa32
  23967. #define __RV_UKCRAS32 __nds__ukcras32
  23968. #define __RV_UKCRSA32 __nds__ukcrsa32
  23969. #define __RV_SRA32 __nds__sra32
  23970. #define __RV_SRAI32 __nds__sra32
  23971. #define __RV_SRL32 __nds__srl32
  23972. #define __RV_SLL32 __nds__sll32
  23973. #define __RV_SLLI32 __nds__sll32
  23974. #define __RV_SRAW_U __nds__sraw_u
  23975. #define __RV_SRA32_U __nds__sra32_u
  23976. #define __RV_SRL32_U __nds__srl32_u
  23977. #define __RV_KSLL32 __nds__ksll32
  23978. #define __RV_KSLRA32 __nds__kslra32
  23979. #define __RV_KSLRA32_U __nds__kslra32_u
  23980. #define __RV_SMBB32 __nds__smbb32
  23981. #define __RV_SMBT32 __nds__smbt32
  23982. #define __RV_SMTT32 __nds__smtt32
  23983. #define __RV_PKBB32 __nds__pkbb32
  23984. #define __RV_PKBT32 __nds__pkbt32
  23985. #define __RV_PKTT32 __nds__pktt32
  23986. #define __RV_PKTB32 __nds__pktb32
  23987. #define __RV_SMIN32 __nds__smin32
  23988. #define __RV_SMAX32 __nds__smax32
  23989. #define __RV_UMIN32 __nds__umin32
  23990. #define __RV_UMAX32 __nds__umax32
  23991. #define __RV_KABS32 __nds__kabs32
  23992. #define __RV_KHMBB16 __nds__khmbb16
  23993. #define __RV_KHMBT16 __nds__khmbt16
  23994. #define __RV_KHMTT16 __nds__khmtt16
  23995. #define __RV_KDMBB16 __nds__kdmbb16
  23996. #define __RV_KDMBT16 __nds__kdmbt16
  23997. #define __RV_KDMTT16 __nds__kdmtt16
  23998. #define __RV_KDMABB16 __nds__kdmabb16
  23999. #define __RV_KDMABT16 __nds__kdmabt16
  24000. #define __RV_KDMATT16 __nds__kdmatt16
  24001. #define __RV_KMABB32 __nds__kmabb32
  24002. #define __RV_KMABT32 __nds__kmabt32
  24003. #define __RV_KMATT32 __nds__kmatt32
  24004. #define __RV_KMDA32 __nds__kmda32
  24005. #define __RV_KMXDA32 __nds__kmxda32
  24006. #define __RV_KMADA32 __nds__kmada32
  24007. #define __RV_KMAXDA32 __nds__kmaxda32
  24008. #define __RV_KMADS32 __nds__kmads32
  24009. #define __RV_KMADRS32 __nds__kmadrs32
  24010. #define __RV_KMAXDS32 __nds__kmaxds32
  24011. #define __RV_KMSDA32 __nds__kmsda32
  24012. #define __RV_KMSXDA32 __nds__kmsxda32
  24013. #define __RV_SMDS32 __nds__smds32
  24014. #define __RV_SMDRS32 __nds__smdrs32
  24015. #define __RV_SMXDS32 __nds__smxds32
  24016. #endif /* __riscv_xlen == 64 */
  24017. // For now, the P-extention version of IAR IDE is 0.5.0, but Nuclei's supports 0.5.4
  24018. // so Nuclei supplies a workround to add custom instructions of those not natively
  24019. // supported by the IAR Assembler. Note that __RV_BPICK remains to be implemented in future.
  24020. // And we only implement Xxldsp Nuclei custom instruction set, bpick not implemented, expdxx
  24021. // implemented in c, not via .insn variant
  24022. #pragma inline=forced_no_body
  24023. unsigned long __RV_STAS16(unsigned long a, unsigned long b) {
  24024. unsigned long r;
  24025. __asm(".insn r 0x7F, 0x2, 0x7A, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24026. return r;
  24027. }
  24028. #pragma inline=forced_no_body
  24029. unsigned long __RV_RSTAS16(unsigned long a, unsigned long b) {
  24030. unsigned long r;
  24031. __asm(".insn r 0x7F, 0x2, 0x5A, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24032. return r;
  24033. }
  24034. #pragma inline=forced_no_body
  24035. unsigned long __RV_KSTAS16(unsigned long a, unsigned long b) {
  24036. unsigned long r;
  24037. __asm(".insn r 0x7F, 0x2, 0x62, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24038. return r;
  24039. }
  24040. #pragma inline=forced_no_body
  24041. unsigned long __RV_URSTAS16(unsigned long a, unsigned long b) {
  24042. unsigned long r;
  24043. __asm(".insn r 0x7F, 0x2, 0x6A, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24044. return r;
  24045. }
  24046. #pragma inline=forced_no_body
  24047. unsigned long __RV_UKSTAS16(unsigned long a, unsigned long b) {
  24048. unsigned long r;
  24049. __asm(".insn r 0x7F, 0x2, 0x72, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24050. return r;
  24051. }
  24052. #pragma inline=forced_no_body
  24053. unsigned long __RV_STSA16(unsigned long a, unsigned long b) {
  24054. unsigned long r;
  24055. __asm(".insn r 0x7F, 0x2, 0x7B, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24056. return r;
  24057. }
  24058. #pragma inline=forced_no_body
  24059. unsigned long __RV_RSTSA16(unsigned long a, unsigned long b) {
  24060. unsigned long r;
  24061. __asm(".insn r 0x7F, 0x2, 0x5B, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24062. return r;
  24063. }
  24064. #pragma inline=forced_no_body
  24065. unsigned long __RV_KSTSA16(unsigned long a, unsigned long b) {
  24066. unsigned long r;
  24067. __asm(".insn r 0x7F, 0x2, 0x63, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24068. return r;
  24069. }
  24070. #pragma inline=forced_no_body
  24071. unsigned long __RV_URSTSA16(unsigned long a, unsigned long b) {
  24072. unsigned long r;
  24073. __asm(".insn r 0x7F, 0x2, 0x6B, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24074. return r;
  24075. }
  24076. #pragma inline=forced_no_body
  24077. unsigned long __RV_UKSTSA16(unsigned long a, unsigned long b) {
  24078. unsigned long r;
  24079. __asm(".insn r 0x7F, 0x2, 0x73, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24080. return r;
  24081. }
  24082. // #pragma inline=forced_no_body
  24083. // unsigned long __RV_BPICK(unsigned long a, unsigned long b, unsigned long c) {
  24084. // TODO: remains to be done
  24085. // }
  24086. // RV64 only
  24087. #pragma inline=forced_no_body
  24088. unsigned long __RV_STAS32(unsigned long a, unsigned long b) {
  24089. unsigned long r;
  24090. __asm(".insn r 0x7F, 0x2, 0x78, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24091. return r;
  24092. }
  24093. #pragma inline=forced_no_body
  24094. unsigned long __RV_RSTAS32(unsigned long a, unsigned long b) {
  24095. unsigned long r;
  24096. __asm(".insn r 0x7F, 0x2, 0x58, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24097. return r;
  24098. }
  24099. #pragma inline=forced_no_body
  24100. unsigned long __RV_KSTAS32(unsigned long a, unsigned long b) {
  24101. unsigned long r;
  24102. __asm(".insn r 0x7F, 0x2, 0x60, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24103. return r;
  24104. }
  24105. #pragma inline=forced_no_body
  24106. unsigned long __RV_URSTAS32(unsigned long a, unsigned long b) {
  24107. unsigned long r;
  24108. __asm(".insn r 0x7F, 0x2, 0x68, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24109. return r;
  24110. }
  24111. #pragma inline=forced_no_body
  24112. unsigned long __RV_UKSTAS32(unsigned long a, unsigned long b) {
  24113. unsigned long r;
  24114. __asm(".insn r 0x7F, 0x2, 0x70, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24115. return r;
  24116. }
  24117. #pragma inline=forced_no_body
  24118. unsigned long __RV_STSA32(unsigned long a, unsigned long b) {
  24119. unsigned long r;
  24120. __asm(".insn r 0x7F, 0x2, 0x79, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24121. return r;
  24122. }
  24123. #pragma inline=forced_no_body
  24124. unsigned long __RV_RSTSA32(unsigned long a, unsigned long b) {
  24125. unsigned long r;
  24126. __asm(".insn r 0x7F, 0x2, 0x59, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24127. return r;
  24128. }
  24129. #pragma inline=forced_no_body
  24130. unsigned long __RV_KSTSA32(unsigned long a, unsigned long b) {
  24131. unsigned long r;
  24132. __asm(".insn r 0x7F, 0x2, 0x61, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24133. return r;
  24134. }
  24135. #pragma inline=forced_no_body
  24136. unsigned long __RV_URSTSA32(unsigned long a, unsigned long b) {
  24137. unsigned long r;
  24138. __asm(".insn r 0x7F, 0x2, 0x69, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24139. return r;
  24140. }
  24141. #pragma inline=forced_no_body
  24142. unsigned long __RV_UKSTSA32(unsigned long a, unsigned long b) {
  24143. unsigned long r;
  24144. __asm(".insn r 0x7F, 0x2, 0x71, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24145. return r;
  24146. }
  24147. #pragma inline=forced_no_body
  24148. unsigned long __RV_EXPD80(unsigned long a)
  24149. {
  24150. return __EXPD_BYTE((uint8_t)(a & 0xff));
  24151. }
  24152. #pragma inline=forced_no_body
  24153. unsigned long __RV_EXPD81(unsigned long a)
  24154. {
  24155. return __EXPD_BYTE((uint8_t)((a >> 8) & 0xff));
  24156. }
  24157. #pragma inline=forced_no_body
  24158. unsigned long __RV_EXPD82(unsigned long a)
  24159. {
  24160. return __EXPD_BYTE((uint8_t)((a >> 16) & 0xff));
  24161. }
  24162. #pragma inline=forced_no_body
  24163. unsigned long __RV_EXPD83(unsigned long a)
  24164. {
  24165. return __EXPD_BYTE((uint8_t)((a >> 24) & 0xff));
  24166. }
  24167. #if __RISCV_XLEN == 64
  24168. // RV64 only
  24169. #pragma inline=forced_no_body
  24170. unsigned long __RV_EXPD84(unsigned long a)
  24171. {
  24172. return __EXPD_BYTE((uint8_t)((a >> 32) & 0xff));
  24173. }
  24174. #pragma inline=forced_no_body
  24175. unsigned long __RV_EXPD85(unsigned long a)
  24176. {
  24177. return __EXPD_BYTE((uint8_t)((a >> 40) & 0xff));
  24178. }
  24179. #pragma inline=forced_no_body
  24180. unsigned long __RV_EXPD86(unsigned long a)
  24181. {
  24182. return __EXPD_BYTE((uint8_t)((a >> 48) & 0xff));
  24183. }
  24184. #pragma inline=forced_no_body
  24185. unsigned long __RV_EXPD87(unsigned long a)
  24186. {
  24187. return __EXPD_BYTE((uint8_t)((a >> 56) & 0xff));
  24188. }
  24189. #endif
  24190. #pragma language=restore
  24191. #else
  24192. #error Unknown compiler
  24193. #endif /* __ICCRISCV__ */
  24194. /* XXXXX ARM Compatiable SIMD API XXXXX */
  24195. /** \brief Q setting quad 8-bit saturating addition. */
  24196. #define __QADD8(x, y) __RV_KADD8(x, y)
  24197. /** \brief Q setting quad 8-bit saturating subtract. */
  24198. #define __QSUB8(x, y) __RV_KSUB8((x), (y))
  24199. /** \brief Q setting dual 16-bit saturating addition. */
  24200. #define __QADD16(x, y) __RV_KADD16((x), (y))
  24201. /** \brief Dual 16-bit signed addition with halved results. */
  24202. #define __SHADD16(x, y) __RV_RADD16((x), (y))
  24203. /** \brief Q setting dual 16-bit saturating subtract. */
  24204. #define __QSUB16(x, y) __RV_KSUB16((x), (y))
  24205. /** \brief Dual 16-bit signed subtraction with halved results. */
  24206. #define __SHSUB16(x, y) __RV_RSUB16((x), (y))
  24207. /** \brief Q setting dual 16-bit add and subtract with exchange. */
  24208. #define __QASX(x, y) __RV_KCRAS16((x), (y))
  24209. /** \brief Dual 16-bit signed addition and subtraction with halved results.*/
  24210. #define __SHASX(x, y) __RV_RCRAS16((x), (y))
  24211. /** \brief Q setting dual 16-bit subtract and add with exchange. */
  24212. #define __QSAX(x, y) __RV_KCRSA16((x), (y))
  24213. /** \brief Dual 16-bit signed subtraction and addition with halved results.*/
  24214. #define __SHSAX(x, y) __RV_RCRSA16((x), (y))
  24215. /** \brief Dual 16-bit signed multiply with exchange returning difference. */
  24216. #define __SMUSDX(x, y) __RV_SMXDS((y), (x))
  24217. /** \brief Q setting sum of dual 16-bit signed multiply with exchange. */
  24218. __STATIC_FORCEINLINE long __SMUADX (unsigned long op1, unsigned long op2)
  24219. {
  24220. return __RV_KMXDA(op1, op2);
  24221. }
  24222. /** \brief Q setting saturating add. */
  24223. #define __QADD(x, y) __RV_KADDW((x), (y))
  24224. /** \brief Q setting saturating subtract. */
  24225. #define __QSUB(x, y) __RV_KSUBW((x), (y))
  24226. /** \brief Q setting dual 16-bit signed multiply with single 32-bit accumulator. */
  24227. __STATIC_FORCEINLINE long __SMLAD(unsigned long op1, unsigned long op2, long acc)
  24228. {
  24229. return __RV_KMADA(acc, op1, op2);
  24230. }
  24231. /** \brief Q setting pre-exchanged dual 16-bit signed multiply with single 32-bit accumulator. */
  24232. __STATIC_FORCEINLINE long __SMLADX(unsigned long op1, unsigned long op2, long acc)
  24233. {
  24234. return __RV_KMAXDA(acc, op1, op2);
  24235. }
  24236. /** \brief Q setting dual 16-bit signed multiply with exchange subtract with 32-bit accumulate. */
  24237. __STATIC_FORCEINLINE long __SMLSDX(unsigned long op1, unsigned long op2, long acc)
  24238. {
  24239. return (acc - __RV_SMXDS(op1, op2));
  24240. }
  24241. /** \brief Dual 16-bit signed multiply with single 64-bit accumulator. */
  24242. __STATIC_FORCEINLINE long long __SMLALD(unsigned long op1, unsigned long op2, long long acc)
  24243. {
  24244. return __RV_SMALDA(acc, op1, op2);
  24245. }
  24246. /** \brief Dual 16-bit signed multiply with exchange with single 64-bit accumulator. */
  24247. __STATIC_FORCEINLINE long long __SMLALDX(unsigned long op1, unsigned long op2, long long acc)
  24248. {
  24249. return __RV_SMALXDA(acc, op1, op2);
  24250. }
  24251. /** \brief Q setting sum of dual 16-bit signed multiply. */
  24252. __STATIC_FORCEINLINE long __SMUAD(unsigned long op1, unsigned long op2)
  24253. {
  24254. return __RV_KMDA(op1, op2);
  24255. }
  24256. /** \brief Dual 16-bit signed multiply returning difference. */
  24257. __STATIC_FORCEINLINE long __SMUSD(unsigned long op1, unsigned long op2)
  24258. {
  24259. return __RV_SMDRS(op1, op2);
  24260. }
  24261. /** \brief Dual extract 8-bits and sign extend each to 16-bits. */
  24262. #define __SXTB16(x) __RV_SUNPKD820(x)
  24263. /** \brief Dual extracted 8-bit to 16-bit signed addition. TODO Need test */
  24264. __STATIC_FORCEINLINE unsigned long __SXTAB16(unsigned long op1, unsigned long op2)
  24265. {
  24266. return __RV_ADD16(op1, __RV_SUNPKD820(op2));
  24267. }
  24268. #define __SXTAB16_RORn(ARG1, ARG2, ROTATE) __SXTAB16(ARG1, __ROR(ARG2, ROTATE))
  24269. /** \brief 32-bit signed multiply with 32-bit truncated accumulator. */
  24270. __STATIC_FORCEINLINE long __SMMLA(long op1, long op2, long acc)
  24271. {
  24272. long mul;
  24273. mul = __RV_SMMUL(op1, op2);
  24274. return (acc + mul);
  24275. }
  24276. #define __DKHM8 __RV_DKHM8
  24277. #define __DKHM16 __RV_DKHM16
  24278. #define __DKSUB16 __RV_DKSUB16
  24279. #define __SMAQA __RV_SMAQA
  24280. #define __MULSR64 __RV_MULSR64
  24281. #define __DQADD8 __RV_DKADD8
  24282. #define __DQSUB8 __RV_DKSUB8
  24283. #define __DKADD16 __RV_DKADD16
  24284. #define __PKBB16 __RV_PKBB16
  24285. #define __DKSLRA16 __RV_DKSLRA16
  24286. #define __DKSLRA8 __RV_DKSLRA8
  24287. #define __KABSW __RV_KABSW
  24288. #define __DKABS8 __RV_DKABS8
  24289. #define __DKABS16 __RV_DKABS16
  24290. #define __SMALDA __RV_SMALDA
  24291. #define __SMSLDA __RV_SMSLDA
  24292. #define __SMALBB __RV_SMALBB
  24293. #define __SUB64 __RV_SUB64
  24294. #define __ADD64 __RV_ADD64
  24295. #define __SMBB16 __RV_SMBB16
  24296. #define __SMBT16 __RV_SMBT16
  24297. #define __SMTT16 __RV_SMTT16
  24298. #define __EXPD80 __RV_EXPD80
  24299. #define __SMAX8 __RV_SMAX8
  24300. #define __SMAX16 __RV_SMAX16
  24301. #define __PKTT16 __RV_PKTT16
  24302. #define __KADD16 __RV_KADD16
  24303. #define __SADD16 __RV_ADD16
  24304. #define __SSUB8 __RV_KSUB8
  24305. #define __SADD8 __RV_KADD8
  24306. #define __USAT16 __RV_UCLIP16
  24307. #define __SMALTT __RV_SMALTT
  24308. /** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3. */
  24309. #define __PKHBT(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_PKTB16(ARG2, ARG1) : \
  24310. (ARG3 == 16) ? __RV_PKBB16(ARG2, ARG1) : \
  24311. (((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \
  24312. ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)))
  24313. /** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3. */
  24314. #define __PKHTB(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_PKTB16(ARG1, ARG2) : \
  24315. (ARG3 == 16) ? __RV_PKTT16(ARG1, ARG2) : \
  24316. (((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \
  24317. ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)))
  24318. #if __RISCV_XLEN == 64
  24319. /** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3,
  24320. and also combines the [47:32] of val1 with bits[63:48] of val2 with the val3, finally pack the two new 32-bits to 64-bit. */
  24321. #define __PKHBT64(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_PKTB16(ARG2, ARG1) : \
  24322. (ARG3 == 16) ? __RV_PKBB16(ARG2, ARG1) : \
  24323. ((int64_t)((((uint32_t)((uint64_t)ARG1 >> 32)) & 0x0000FFFFUL) | \
  24324. ((((uint32_t)((uint64_t)ARG2 >> 32)) << (ARG3)) & 0xFFFF0000UL)) << 32) | \
  24325. ((int64_t)(((((uint32_t)(ARG1))) & 0x0000FFFFUL) | \
  24326. ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)) & 0xFFFFFFFFUL))
  24327. /** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3,
  24328. and also combines bits [63:48] of val1 with bits [47:32] of val2 with the val3, finally pack the two new 32bits to 64bits. */
  24329. #define __PKHTB64(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_PKTB16(ARG1, ARG2) : \
  24330. (ARG3 == 16) ? __RV_PKTT16(ARG1, ARG2) : \
  24331. ((uint64_t)(((uint32_t)((uint64_t)ARG1 >> 32) & 0xFFFF0000UL) | \
  24332. ((((uint32_t)((uint64_t)ARG2 >> 32)) >> (ARG3)) & 0x0000FFFFUL)) << 32) | \
  24333. ((uint64_t)(((uint32_t)(ARG1) & 0xFFFF0000UL) | \
  24334. ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)) & 0xFFFFFFFFUL))
  24335. #else
  24336. /** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3,
  24337. and also combines the [47:32] of val1 with bits[63:48] of val2 with the val3, finally pack the two new 32-bits to 64-bit. */
  24338. #define __PKHBT64(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_DPKTB16(ARG2, ARG1) : \
  24339. (ARG3 == 16) ? __RV_DPKBB16(ARG2, ARG1) : \
  24340. ((int64_t)((((uint32_t)((uint64_t)ARG1 >> 32)) & 0x0000FFFFUL) | \
  24341. ((((uint32_t)((uint64_t)ARG2 >> 32)) << (ARG3)) & 0xFFFF0000UL)) << 32) | \
  24342. ((int64_t)(((((uint32_t)(ARG1))) & 0x0000FFFFUL) | \
  24343. ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)) & 0xFFFFFFFFUL))
  24344. /** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3,
  24345. and also combines bits [63:48] of val1 with bits [47:32] of val2 with the val3, finally pack the two new 32bits to 64bits. */
  24346. #define __PKHTB64(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_DPKTB16(ARG1, ARG2) : \
  24347. (ARG3 == 16) ? __RV_DPKTT16(ARG1, ARG2) : \
  24348. ((uint64_t)(((uint32_t)((uint64_t)ARG1 >> 32) & 0xFFFF0000UL) | \
  24349. ((((uint32_t)((uint64_t)ARG2 >> 32)) >> (ARG3)) & 0x0000FFFFUL)) << 32) | \
  24350. ((uint64_t)(((uint32_t)(ARG1) & 0xFFFF0000UL) | \
  24351. ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)) & 0xFFFFFFFFUL))
  24352. #endif /* __RISCV_XLEN == 64 */
  24353. /** first rotate then extract. This is more suitable for arm compiler for it can rotate and extract in one command*/
  24354. #define __SXTB16_RORn(ARG1, ARG2) __RV_SUNPKD820(__ROR(ARG1, ARG2))
  24355. #endif /* defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) */
  24356. #ifdef __cplusplus
  24357. }
  24358. #endif
  24359. #endif /* __CORE_FEATURE_DSP__ */